This is an R Markdown Notebook. When you execute code within the notebook, the results appear beneath the code.
Try executing this chunk by clicking the Run button within the chunk or by placing your cursor inside it and pressing Ctrl+Shift+Enter.
```r
library(xgboost)
<!-- rnb-source-end -->
<!-- rnb-output-begin eyJkYXRhIjoiUmVnaXN0ZXJlZCBTMyBtZXRob2Qgb3ZlcndyaXR0ZW4gYnkgJ2RhdGEudGFibGUnOlxuICBtZXRob2QgICAgICAgICAgIGZyb21cbiAgcHJpbnQuZGF0YS50YWJsZSAgICAgXG4ifQ== -->
Registered S3 method overwritten by ‘data.table’: method from print.data.table
<!-- rnb-output-end -->
<!-- rnb-source-begin eyJkYXRhIjoiYGBgclxuYGBgclxubGlicmFyeShNYXRyaXgpXG5saWJyYXJ5KG1jbHVzdClcbmBgYFxuYGBgIn0= -->
```r
```r
library(Matrix)
library(mclust)
<!-- rnb-source-end -->
<!-- rnb-output-begin eyJkYXRhIjoiICAgIF9fICBfX19fX19fX19fXyAgICBfXyAgX19fX19fX19fX19fX1xuICAgLyAgfC8gIC8gX19fXy8gLyAgIC8gLyAvIC8gX19fL18gIF9fL1xuICAvIC98Xy8gLyAvICAgLyAvICAgLyAvIC8gL1xcX18gXFwgLyAvICAgXG4gLyAvICAvIC8gL19fXy8gL19fXy8gL18vIC9fX18vIC8vIC8gICAgXG4vXy8gIC9fL1xcX19fXy9fX19fXy9cXF9fX18vL19fX18vL18vICAgIHZlcnNpb24gNS40LjlcblR5cGUgJ2NpdGF0aW9uKFxcbWNsdXN0XFwpJyBmb3IgY2l0aW5nIHRoaXMgUiBwYWNrYWdlIGluIHB1YmxpY2F0aW9ucy5cbiJ9 -->
__ ___________ __ _____________
/ |/ / ____/ / / / / / / / / /|/ / / / / / / / /_ / /
/ / / / // // // /_/ // /
// //__/_/_//____//_/ version 5.4.9 Type ‘citation()’ for citing this R package in publications.
<!-- rnb-output-end -->
<!-- rnb-source-begin eyJkYXRhIjoiYGBgclxuYGBgclxubGlicmFyeSh0aWR5dmVyc2UpXG5gYGBcbmBgYCJ9 -->
```r
```r
library(tidyverse)
<!-- rnb-source-end -->
<!-- rnb-output-begin eyJkYXRhIjoiUmVnaXN0ZXJlZCBTMyBtZXRob2RzIG92ZXJ3cml0dGVuIGJ5ICdkYnBseXInOlxuICBtZXRob2QgICAgICAgICBmcm9tXG4gIHByaW50LnRibF9sYXp5ICAgICBcbiAgcHJpbnQudGJsX3NxbCAgICAgIFxu4pSAIEF0dGFjaGluZyBwYWNrYWdlcyDilIDilIDilIDilIDilIDilIDilIDilIDilIDilIDilIDilIDilIDilIDilIDilIDilIDilIDilIDilIDilIDilIDilIDilIDilIDilIDilIDilIDilIDilIDilIDilIDilIDilIDilIAgdGlkeXZlcnNlIDEuMy4xIOKUgFxu4pyTIGdncGxvdDIgMy4zLjUgICAgIOKckyBwdXJyciAgIDAuMy40XG7inJMgdGliYmxlICAzLjEuNSAgICAg4pyTIGRwbHlyICAgMS4wLjdcbuKckyB0aWR5ciAgIDEuMS40ICAgICDinJMgc3RyaW5nciAxLjQuMFxu4pyTIHJlYWRyICAgMi4wLjIgICAgIOKckyBmb3JjYXRzIDAuNS4xXG7ilIAgQ29uZmxpY3RzIOKUgOKUgOKUgOKUgOKUgOKUgOKUgOKUgOKUgOKUgOKUgOKUgOKUgOKUgOKUgOKUgOKUgOKUgOKUgOKUgOKUgOKUgOKUgOKUgOKUgOKUgOKUgOKUgOKUgOKUgOKUgOKUgOKUgOKUgOKUgOKUgCB0aWR5dmVyc2VfY29uZmxpY3RzKCkg4pSAXG54IHRpZHlyOjpleHBhbmQoKSBtYXNrcyBNYXRyaXg6OmV4cGFuZCgpXG54IGRwbHlyOjpmaWx0ZXIoKSBtYXNrcyBzdGF0czo6ZmlsdGVyKClcbnggZHBseXI6OmxhZygpICAgIG1hc2tzIHN0YXRzOjpsYWcoKVxueCBwdXJycjo6bWFwKCkgICAgbWFza3MgbWNsdXN0OjptYXAoKVxueCB0aWR5cjo6cGFjaygpICAgbWFza3MgTWF0cml4OjpwYWNrKClcbnggZHBseXI6OnNsaWNlKCkgIG1hc2tzIHhnYm9vc3Q6OnNsaWNlKClcbnggdGlkeXI6OnVucGFjaygpIG1hc2tzIE1hdHJpeDo6dW5wYWNrKClcbiJ9 -->
Registered S3 methods overwritten by ‘dbplyr’: method from print.tbl_lazy
print.tbl_sql
─ Attaching packages ─────────────────────────────────── tidyverse 1.3.1 ─ ✓ ggplot2 3.3.5 ✓ purrr 0.3.4 ✓ tibble 3.1.5 ✓ dplyr 1.0.7 ✓ tidyr 1.1.4 ✓ stringr 1.4.0 ✓ readr 2.0.2 ✓ forcats 0.5.1 ─ Conflicts ──────────────────────────────────── tidyverse_conflicts() ─ x tidyr::expand() masks Matrix::expand() x dplyr::filter() masks stats::filter() x dplyr::lag() masks stats::lag() x purrr::map() masks mclust::map() x tidyr::pack() masks Matrix::pack() x dplyr::slice() masks xgboost::slice() x tidyr::unpack() masks Matrix::unpack()
<!-- rnb-output-end -->
<!-- rnb-chunk-end -->
<!-- rnb-text-begin -->
<!-- rnb-text-end -->
<!-- rnb-chunk-begin -->
<!-- rnb-source-begin eyJkYXRhIjoiYGBgclxuYGBgclxuZHMwIDwtIHJlYWRSRFMoXFwuL2RzMC5yZHNcXClcbmRzMSA8LSByZWFkUkRTKFxcLi9kczEucmRzXFwpXG5gYGBcbmBgYCJ9 -->
```r
```r
ds0 <- readRDS(\./ds0.rds\)
ds1 <- readRDS(\./ds1.rds\)
<!-- rnb-source-end -->
<!-- rnb-output-begin eyJkYXRhIjoiTG9hZGluZyByZXF1aXJlZCBwYWNrYWdlOiBTZXVyYXRcblJlZ2lzdGVyZWQgUzMgbWV0aG9kIG92ZXJ3cml0dGVuIGJ5ICdodG1sd2lkZ2V0cyc6XG4gIG1ldGhvZCAgICAgICAgICAgZnJvbSAgICAgICAgIFxuICBwcmludC5odG1sd2lkZ2V0IHRvb2xzOnJzdHVkaW9cblJlZ2lzdGVyZWQgUzMgbWV0aG9kIG92ZXJ3cml0dGVuIGJ5ICdzcGF0c3RhdCc6XG4gIG1ldGhvZCAgICAgZnJvbVxuICBwcmludC5ib3h4IGNsaSBcbiJ9 -->
Loading required package: Seurat Registered S3 method overwritten by ‘htmlwidgets’: method from
print.htmlwidget tools:rstudio Registered S3 method overwritten by ‘spatstat’: method from print.boxx cli
<!-- rnb-output-end -->
<!-- rnb-source-begin eyJkYXRhIjoiYGBgclxuYGBgclxuZHMyIDwtIHJlYWRSRFMoXFwuL2RzMi5yZHNcXClcbmBgYFxuYGBgIn0= -->
```r
```r
ds2 <- readRDS(\./ds2.rds\)
<!-- rnb-source-end -->
<!-- rnb-chunk-end -->
<!-- rnb-text-begin -->
# 分发训练集
<!-- rnb-text-end -->
<!-- rnb-chunk-begin -->
<!-- rnb-source-begin eyJkYXRhIjoiYGBgclxuYGBgclxuIyBJZGVudHMoZHMyKSA8LSBkczIkY29uZGl0aW9uc1xuIyBkczJfQUMgPC0gc3Vic2V0KGRzMiwgaWRlbnRzID0gXFxBQ1xcKVxuIyBkczJfUEEgPC0gc3Vic2V0KGRzMiwgaWRlbnRzID0gXFxQQVxcKVxuIyBkczJfQUMgPC0gZHMyX0FDICU+JSBGaW5kTmVpZ2hib3JzKGRpbXMgPSAxOjIwKSAlPiUgRmluZENsdXN0ZXJzKHJlc29sdXRpb24gPSAwLjEpXG4jIGRzMl9QQSA8LSBkczJfUEEgJT4lIEZpbmROZWlnaGJvcnMoZGltcyA9IDE6MjApICU+JSBGaW5kQ2x1c3RlcnMocmVzb2x1dGlvbiA9IDAuMSlcbiMgdW1hcHBsb3QoZHMyX0FDKSArIHNjYWxlX3lfY29udGludW91cyhsaW1pdHMgPSBjKC01LDE1KSxicmVha3MgPSBOVUxMKSArXG4jICAgICAgICAgc2NhbGVfeF9jb250aW51b3VzKGxpbWl0cyA9IGMoLTUsMTUpLGJyZWFrcyA9IE5VTEwpXG4jIHVtYXBwbG90KGRzMl9QQSkrIHNjYWxlX3lfY29udGludW91cyhsaW1pdHMgPSBjKC01LDE1KSxicmVha3MgPSBOVUxMKSArXG4jICAgICAgICAgc2NhbGVfeF9jb250aW51b3VzKGxpbWl0cyA9IGMoLTUsMTUpLGJyZWFrcyA9IE5VTEwpXG4jIFxuIyBBQ19tYXJrZXJzIDwtIEZpbmRBbGxNYXJrZXJzKGRzMl9BQyxsb2dmYy50aHJlc2hvbGQgPSAwLjcsIG1pbi5kaWZmLnBjdCA9IDAuMilcbiMgIyBQQV9tYXJrZXJzIDwtIEZpbmRBbGxNYXJrZXJzKGRzMl9QQSxsb2dmYy50aHJlc2hvbGQgPSAwLjcsIG1pbi5kaWZmLnBjdCA9IDAuMilcbiMgd3JpdGUuY3N2KEFDX21hcmtlcnMsXFxBQ19TTUNfbWFya2Vycy5jc3ZcXClcblxuZHMyX0FDIDwtIHJlYWRSRFMoXFxkczJfQUMucmRzXFwpXG5kczJfUEEgPC0gcmVhZFJEUyhcXGRzMl9QQS5yZHNcXClcblxudW1hcHBsb3QoZHMyX0FDKVxudW1hcHBsb3QoZHMyX1BBKVxuXG5kczJfQUMkQ2xhc3NpZmljYXRpb24gPC0gSWRlbnRzKGRzMl9BQylcbklkZW50cyhkczJfQUMpIDwtIGRzMl9BQyRzZXVyYXRfY2x1c3RlcnNcbmRzMl9BQyA8LSBSZW5hbWVJZGVudHMoZHMyX0FDLCcwJyA9ICczJywnMScgPSAnMScsJzInID0gJzAnLCczJyA9ICcyJylcbklkZW50cyhkczJfQUMpIDwtIGZhY3RvcihJZGVudHMoZHMyX0FDKSxsZXZlbHMgPSBjKDAsMSwyLDMpKVxuZHMyX0FDJHNldXJhdF9jbHVzdGVycyA8LSBJZGVudHMoZHMyX0FDKVxuXG5JZGVudHMoZHMyX1BBKSA8LSBkczJfUEEkc2V1cmF0X2NsdXN0ZXJzXG5kczJfUEEkQ2xhc3NpZmljYXRpb24gPC0gSWRlbnRzKGRzMl9QQSlcblxuXG5gYGBcbmBgYCJ9 -->
```r
```r
# Idents(ds2) <- ds2$conditions
# ds2_AC <- subset(ds2, idents = \AC\)
# ds2_PA <- subset(ds2, idents = \PA\)
# ds2_AC <- ds2_AC %>% FindNeighbors(dims = 1:20) %>% FindClusters(resolution = 0.1)
# ds2_PA <- ds2_PA %>% FindNeighbors(dims = 1:20) %>% FindClusters(resolution = 0.1)
# umapplot(ds2_AC) + scale_y_continuous(limits = c(-5,15),breaks = NULL) +
# scale_x_continuous(limits = c(-5,15),breaks = NULL)
# umapplot(ds2_PA)+ scale_y_continuous(limits = c(-5,15),breaks = NULL) +
# scale_x_continuous(limits = c(-5,15),breaks = NULL)
#
# AC_markers <- FindAllMarkers(ds2_AC,logfc.threshold = 0.7, min.diff.pct = 0.2)
# # PA_markers <- FindAllMarkers(ds2_PA,logfc.threshold = 0.7, min.diff.pct = 0.2)
# write.csv(AC_markers,\AC_SMC_markers.csv\)
ds2_AC <- readRDS(\ds2_AC.rds\)
ds2_PA <- readRDS(\ds2_PA.rds\)
umapplot(ds2_AC)
umapplot(ds2_PA)
ds2_AC$Classification <- Idents(ds2_AC)
Idents(ds2_AC) <- ds2_AC$seurat_clusters
ds2_AC <- RenameIdents(ds2_AC,'0' = '3','1' = '1','2' = '0','3' = '2')
Idents(ds2_AC) <- factor(Idents(ds2_AC),levels = c(0,1,2,3))
ds2_AC$seurat_clusters <- Idents(ds2_AC)
Idents(ds2_PA) <- ds2_PA$seurat_clusters
ds2_PA$Classification <- Idents(ds2_PA)
<!-- rnb-source-end -->
<!-- rnb-chunk-end -->
<!-- rnb-text-begin -->
## 在AC上预训练
<!-- rnb-text-end -->
<!-- rnb-chunk-begin -->
<!-- rnb-source-begin eyJkYXRhIjoiYGBgclxuYGBgclxuZHMyX0FDJENsYXNzaWZpY2F0aW9uIDwtIElkZW50cyhkczJfQUMpXG5JZGVudHMoZHMyX0FDKSA8LSBkczJfQUMkc2V1cmF0X2NsdXN0ZXJzXG5BQ19kYXRhIDwtIGdldF9kYXRhX3RhYmxlKGRzMl9BQywgaGlnaHZhciA9IEYsIHR5cGUgPSBcXGRhdGFcXClcbkFDX2xhYmVsIDwtIGFzLm51bWVyaWMoYXMuY2hhcmFjdGVyKElkZW50cyhkczJfQUMpKSlcblxuc2V0LnNlZWQoNylcbmluZGV4IDwtIGMoMTpkaW0oQUNfZGF0YSlbMl0pICU+JSBzYW1wbGUoY2VpbGluZygwLjMqZGltKEFDX2RhdGEpWzJdKSwgcmVwbGFjZSA9IEYsIHByb2IgPSBOVUxMKVxuXG5jb2xuYW1lcyhBQ19kYXRhKSA8LSBOVUxMXG5cbkFDX3RyYWluX2RhdGEgPC0gbGlzdChkYXRhID0gdChhcyhBQ19kYXRhWywtaW5kZXhdLFxcZGdDTWF0cml4XFwpKSwgbGFiZWwgPSBBQ19sYWJlbFstaW5kZXhdKVxuQUNfdGVzdF9kYXRhIDwtIGxpc3QoZGF0YSA9IHQoYXMoQUNfZGF0YVssaW5kZXhdLFxcZGdDTWF0cml4XFwpKSwgbGFiZWwgPSBBQ19sYWJlbFtpbmRleF0pXG5cbkFDX3RyYWluIDwtIHhnYi5ETWF0cml4KGRhdGEgPSBBQ190cmFpbl9kYXRhJGRhdGEsbGFiZWwgPSBBQ190cmFpbl9kYXRhJGxhYmVsKVxuQUNfdGVzdCA8LSB4Z2IuRE1hdHJpeChkYXRhID0gQUNfdGVzdF9kYXRhJGRhdGEsbGFiZWwgPSBBQ190ZXN0X2RhdGEkbGFiZWwpXG5cbiMgeGdiX3BhcmFtc190cmFpbiA9IHtcbiMgICAgICdvYmplY3RpdmUnOidtdWx0aTpzb2Z0bWF4JyxcbiMgICAgICdldmFsX21ldHJpYyc6J21sb2dsb3NzJyxcbiMgICAgICdudW1fY2xhc3MnOnNlbGYubnVtYmVydHJhaW5jbGFzc2VzLFxuIyAgICAgJ2V0YSc6MC4yLFxuIyAgICAgJ21heF9kZXB0aCc6NixcbiMgICAgICdzdWJzYW1wbGUnOiAwLjZ9XG4jIG5yb3VuZCA9IDEwMFxuXG53YXRjaGxpc3QgPC0gbGlzdCh0cmFpbiA9IEFDX3RyYWluLCBldmFsID0gQUNfdGVzdClcbnhnYl9wYXJhbSA8LSBsaXN0KGV0YSA9IDAuMiwgbWF4X2RlcHRoID0gNiwgXG4gICAgICAgICAgICAgICAgICBzdWJzYW1wbGUgPSAwLjYsICBudW1fY2xhc3MgPSBsZW5ndGgodGFibGUoSWRlbnRzKGRzMl9BQykpKSxcbiAgICAgICAgICAgICAgICAgIG9iamVjdGl2ZSA9IFxcbXVsdGk6c29mdG1heFxcLCBldmFsX21ldHJpYyA9ICdtbG9nbG9zcycpXG5cbmJzdF9tb2RlbCA8LSB4Z2IudHJhaW4oeGdiX3BhcmFtLCBBQ190cmFpbiwgbnJvdW5kcyA9IDEwMCwgd2F0Y2hsaXN0LCB2ZXJib3NlID0gMClcblxuZXZhbF9sb3NzIDwtIGJzdF9tb2RlbFtbXFxldmFsdWF0aW9uX2xvZ1xcXV1bW1xcZXZhbF9tbG9nbG9zc1xcXV1cbnBsb3RfbHkoZGF0YS5mcmFtZShldmFsX2xvc3MpLCB4ID0gYygxOjEwMCksIHkgPSBldmFsX2xvc3MpICU+JSBcbiAgYWRkX3RyYWNlKHR5cGUgPSBcXHNjYXR0ZXJcXCwgbW9kZSA9IFxcbWFya2VycytsaW5lc1xcLCBcbiAgICAgICAgICAgIG1hcmtlciA9IGxpc3QoY29sb3IgPSBcXGJsYWNrXFwsIGxpbmUgPSBsaXN0KGNvbG9yID0gXFwjMUU5MEZGQzdcXCwgd2lkdGggPSAxKSksXG4gICAgICAgICAgICBsaW5lID0gbGlzdChjb2xvciA9IFxcIzFFOTBGRjgwXFwsIHdpZHRoID0gMikpICU+JSBcbiAgbGF5b3V0KHhheGlzID0gbGlzdCh0aXRsZSA9IFxcZXBvY2hcXCkseWF4aXMgPSBsaXN0KHRpdGxlID0gXFxldmFsX21sb2dsb3NzXFwpKVxuXG5gYGBcbmBgYCJ9 -->
```r
```r
ds2_AC$Classification <- Idents(ds2_AC)
Idents(ds2_AC) <- ds2_AC$seurat_clusters
AC_data <- get_data_table(ds2_AC, highvar = F, type = \data\)
AC_label <- as.numeric(as.character(Idents(ds2_AC)))
set.seed(7)
index <- c(1:dim(AC_data)[2]) %>% sample(ceiling(0.3*dim(AC_data)[2]), replace = F, prob = NULL)
colnames(AC_data) <- NULL
AC_train_data <- list(data = t(as(AC_data[,-index],\dgCMatrix\)), label = AC_label[-index])
AC_test_data <- list(data = t(as(AC_data[,index],\dgCMatrix\)), label = AC_label[index])
AC_train <- xgb.DMatrix(data = AC_train_data$data,label = AC_train_data$label)
AC_test <- xgb.DMatrix(data = AC_test_data$data,label = AC_test_data$label)
# xgb_params_train = {
# 'objective':'multi:softmax',
# 'eval_metric':'mlogloss',
# 'num_class':self.numbertrainclasses,
# 'eta':0.2,
# 'max_depth':6,
# 'subsample': 0.6}
# nround = 100
watchlist <- list(train = AC_train, eval = AC_test)
xgb_param <- list(eta = 0.2, max_depth = 6,
subsample = 0.6, num_class = length(table(Idents(ds2_AC))),
objective = \multi:softmax\, eval_metric = 'mlogloss')
bst_model <- xgb.train(xgb_param, AC_train, nrounds = 100, watchlist, verbose = 0)
eval_loss <- bst_model[[\evaluation_log\]][[\eval_mlogloss\]]
plot_ly(data.frame(eval_loss), x = c(1:100), y = eval_loss) %>%
add_trace(type = \scatter\, mode = \markers+lines\,
marker = list(color = \black\, line = list(color = \#1E90FFC7\, width = 1)),
line = list(color = \#1E90FF80\, width = 2)) %>%
layout(xaxis = list(title = \epoch\),yaxis = list(title = \eval_mlogloss\))
<!-- rnb-source-end -->
<!-- rnb-chunk-end -->
<!-- rnb-text-begin -->
<!-- rnb-text-end -->
<!-- rnb-chunk-begin -->
<!-- rnb-source-begin eyJkYXRhIjoiYGBgclxuYGBgclxuIyDnibnlvoHmj5Dlj5ZcbmltcG9ydGFuY2UgPC0geGdiLmltcG9ydGFuY2UoY29sbmFtZXMoQUNfdHJhaW4pLCBtb2RlbCA9IGJzdF9tb2RlbClcbmhlYWQoaW1wb3J0YW5jZSlcbnhnYi5nZ3Bsb3QuaW1wb3J0YW5jZShoZWFkKGltcG9ydGFuY2UsMjApLCBuX2NsdXN0ZXJzID0gMSkgKyB0aGVtZV9taW5pbWFsKClcblxubXVsdGlfZmVhdHVyZXBsb3QoaGVhZChpbXBvcnRhbmNlLDkpJEZlYXR1cmUsIGRzMl9BQykgXG5BQ19nZW5lcyA8LSBoZWFkKGltcG9ydGFuY2UsIDUwMCkgIyPpgInmi6l0b3A1MDBcblxud3JpdGUuY3N2KEFDX2dlbmVzLCBcXC4vZGF0YXRhYmxlL0FDX2ZlYXR1cmVzLmNzdlxcLCByb3cubmFtZXMgPSBGKVxuXG4j5re35reG55+p6Zi1XG5wcmVkaWN0X0FDX3Rlc3QgPC0gcm91bmQocHJlZGljdChic3RfbW9kZWwsIG5ld2RhdGEgPSBBQ190ZXN0KSlcblxuQUNfY29uZnVzZV9tYXRyaXhfdGVzdCA8LSB0YWJsZShBQ190ZXN0X2RhdGEkbGFiZWwsIHByZWRpY3RfQUNfdGVzdCwgZG5uPWMoXFx0cnVlXFwsXFxwcmVcXCkpXG5BQ19jb25mdXNlX21hdHJpeF90ZXN0X3Byb3AgPC0gcHJvcC50YWJsZShBQ19jb25mdXNlX21hdHJpeF90ZXN0LCAxKVxuQUNfY29uZnVzZV9tYXRyaXhfdGVzdF9wcm9wXG5cbmNvbmZ1c2VfYnViYmxlbWF0KEFDX2NvbmZ1c2VfbWF0cml4X3Rlc3RfcHJvcCwgYyhcXEZpYnJvYmxhc3RcXCwgXFxTTUMxXFwsIFxcRmlicm9teW9jeXRlXFwsIFxcU01DMlxcKSwgYyhcXEZpYnJvYmxhc3RcXCwgXFxTTUMxXFwsIFxcRmlicm9teW9jeXRlXFwsIFxcU01DMlxcKSxcXEFDX3ByZXRyYWluXFwpXG5cbiNST0Pmm7Lnur9cbnhnYm9vc3Rfcm9jIDwtIHBST0M6Om11bHRpY2xhc3Mucm9jKEFDX3Rlc3RfZGF0YSRsYWJlbCwgcHJlZGljdF9BQ190ZXN0KSAj5aSa5YiG57G7Uk9DXG54Z2Jvb3N0X3JvY1tbXFxhdWNcXF1dICPlj6rpnIDopoHov5nkuKrlgLxcbmFkanVzdGVkUmFuZEluZGV4KEFDX3Rlc3RfZGF0YSRsYWJlbCwgcHJlZGljdF9BQ190ZXN0KSAj5YiG57G75Zmo5oCn6IO9XG5gYGBcbmBgYCJ9 -->
```r
```r
# 特征提取
importance <- xgb.importance(colnames(AC_train), model = bst_model)
head(importance)
xgb.ggplot.importance(head(importance,20), n_clusters = 1) + theme_minimal()
multi_featureplot(head(importance,9)$Feature, ds2_AC)
AC_genes <- head(importance, 500) ##选择top500
write.csv(AC_genes, \./datatable/AC_features.csv\, row.names = F)
#混淆矩阵
predict_AC_test <- round(predict(bst_model, newdata = AC_test))
AC_confuse_matrix_test <- table(AC_test_data$label, predict_AC_test, dnn=c(\true\,\pre\))
AC_confuse_matrix_test_prop <- prop.table(AC_confuse_matrix_test, 1)
AC_confuse_matrix_test_prop
confuse_bubblemat(AC_confuse_matrix_test_prop, c(\Fibroblast\, \SMC1\, \Fibromyocyte\, \SMC2\), c(\Fibroblast\, \SMC1\, \Fibromyocyte\, \SMC2\),\AC_pretrain\)
#ROC曲线
xgboost_roc <- pROC::multiclass.roc(AC_test_data$label, predict_AC_test) #多分类ROC
xgboost_roc[[\auc\]] #只需要这个值
adjustedRandIndex(AC_test_data$label, predict_AC_test) #分类器性能
<!-- rnb-source-end -->
<!-- rnb-chunk-end -->
<!-- rnb-text-begin -->
ARI = 0.9585312
## 在PA上训练
<!-- rnb-text-end -->
<!-- rnb-chunk-begin -->
<!-- rnb-source-begin eyJkYXRhIjoiYGBgclxuYGBgclxuc2VsZWN0ZWRfZmVhdHVyZXMgPC0gaW50ZXJzZWN0KFBBX2dlbmVzJEZlYXR1cmUsIEFDX2dlbmVzJEZlYXR1cmUpXG53cml0ZS5jc3Yoc2VsZWN0ZWRfZmVhdHVyZXMsIFxcLi9kYXRhdGFibGUvc2VsZWN0ZWRfZmVhdHVyZXMuY3N2XFwsIHJvdy5uYW1lcyA9IEYpXG5cbnNlbGVjdGVkX2ZlYXR1cmVzIDwtIHJlYWQuY3N2KFxcLi9kYXRhdGFibGUvc2VsZWN0ZWRfZmVhdHVyZXMuY3N2XFwsIHN0cmluZ3NBc0ZhY3RvcnMgPSBGKVxuc2VsZWN0ZWRfZmVhdHVyZXMgPC0gc2VsZWN0ZWRfZmVhdHVyZXMkeFxuUEFfZGF0YSA8LSBnZXRfZGF0YV90YWJsZShkczJfUEEsIGhpZ2h2YXIgPSBGLCB0eXBlID0gXFxkYXRhXFwpXG5QQV9kYXRhIDwtIFBBX2RhdGFbc2VsZWN0ZWRfZmVhdHVyZXMsXVxuUEFfbGFiZWwgPC0gYXMubnVtZXJpYyhhcy5jaGFyYWN0ZXIoSWRlbnRzKGRzMl9QQSkpKVxuY29sbmFtZXMoUEFfZGF0YSkgPC0gTlVMTFxuXG5QQV90cmFpbl9kYXRhIDwtIGxpc3QoZGF0YSA9IHQoYXMoUEFfZGF0YSxcXGRnQ01hdHJpeFxcKSksIGxhYmVsID0gUEFfbGFiZWwpXG5QQV90cmFpbiA8LSB4Z2IuRE1hdHJpeChkYXRhID0gUEFfdHJhaW5fZGF0YSRkYXRhLGxhYmVsID0gUEFfdHJhaW5fZGF0YSRsYWJlbClcbnhnYl9wYXJhbSA8LSBsaXN0KGV0YSA9IDAuMiwgbWF4X2RlcHRoID0gNiwgXG4gICAgICAgICAgICAgICAgICBzdWJzYW1wbGUgPSAwLjYsICBudW1fY2xhc3MgPSBsZW5ndGgodGFibGUoSWRlbnRzKGRzMl9QQSkpKSxcbiAgICAgICAgICAgICAgICAgIG9iamVjdGl2ZSA9IFxcbXVsdGk6c29mdG1heFxcLCBldmFsX21ldHJpYyA9ICdtbG9nbG9zcycpXG5cbmJzdF9tb2RlbCA8LSB4Z2IudHJhaW4oeGdiX3BhcmFtLCBQQV90cmFpbiwgbnJvdW5kcyA9IDEwMCwgdmVyYm9zZSA9IDApXG5cbiMg54m55b6B5o+Q5Y+WXG5pbXBvcnRhbmNlIDwtIHhnYi5pbXBvcnRhbmNlKGNvbG5hbWVzKFBBX3RyYWluKSwgbW9kZWwgPSBic3RfbW9kZWwpXG5oZWFkKGltcG9ydGFuY2UpXG54Z2IuZ2dwbG90LmltcG9ydGFuY2UoaGVhZChpbXBvcnRhbmNlLDIwKSxuX2NsdXN0ZXJzID0gMSkgKyB0aGVtZV9idygpXG53cml0ZS5jc3YoaW1wb3J0YW5jZSwgXFwuL2RhdGF0YWJsZS9QQXRyYWluX2ZlYXR1cmVzLmNzdlxcLCByb3cubmFtZXMgPSBGKVxuXG4jIG11bHRpX2ZlYXR1cmVwbG90KGhlYWQoaW1wb3J0YW5jZSw5KSRGZWF0dXJlLCBkczIpXG5gYGBcbmBgYCJ9 -->
```r
```r
selected_features <- intersect(PA_genes$Feature, AC_genes$Feature)
write.csv(selected_features, \./datatable/selected_features.csv\, row.names = F)
selected_features <- read.csv(\./datatable/selected_features.csv\, stringsAsFactors = F)
selected_features <- selected_features$x
PA_data <- get_data_table(ds2_PA, highvar = F, type = \data\)
PA_data <- PA_data[selected_features,]
PA_label <- as.numeric(as.character(Idents(ds2_PA)))
colnames(PA_data) <- NULL
PA_train_data <- list(data = t(as(PA_data,\dgCMatrix\)), label = PA_label)
PA_train <- xgb.DMatrix(data = PA_train_data$data,label = PA_train_data$label)
xgb_param <- list(eta = 0.2, max_depth = 6,
subsample = 0.6, num_class = length(table(Idents(ds2_PA))),
objective = \multi:softmax\, eval_metric = 'mlogloss')
bst_model <- xgb.train(xgb_param, PA_train, nrounds = 100, verbose = 0)
# 特征提取
importance <- xgb.importance(colnames(PA_train), model = bst_model)
head(importance)
xgb.ggplot.importance(head(importance,20),n_clusters = 1) + theme_bw()
write.csv(importance, \./datatable/PAtrain_features.csv\, row.names = F)
# multi_featureplot(head(importance,9)$Feature, ds2)
<!-- rnb-source-end -->
<!-- rnb-chunk-end -->
<!-- rnb-text-begin -->
<!-- rnb-text-end -->
<!-- rnb-chunk-begin -->
<!-- rnb-source-begin eyJkYXRhIjoiYGBgclxuYGBgclxuQUNfZGF0YSA8LSBnZXRfZGF0YV90YWJsZShkczJfQUMsIGhpZ2h2YXIgPSBGLCB0eXBlID0gXFxkYXRhXFwpXG5BQ19kYXRhIDwtIEFDX2RhdGFbc2VsZWN0ZWRfZmVhdHVyZXMsXVxuQUNfbGFiZWwgPC0gYXMubnVtZXJpYyhhcy5jaGFyYWN0ZXIoSWRlbnRzKGRzMl9BQykpKVxuY29sbmFtZXMoQUNfZGF0YSkgPC0gTlVMTFxuQUNfdGVzdF9kYXRhIDwtIGxpc3QoZGF0YSA9IHQoYXMoQUNfZGF0YSxcXGRnQ01hdHJpeFxcKSksIGxhYmVsID0gQUNfbGFiZWwpXG5BQ190ZXN0IDwtIHhnYi5ETWF0cml4KGRhdGEgPSBBQ190ZXN0X2RhdGEkZGF0YSxsYWJlbCA9IEFDX3Rlc3RfZGF0YSRsYWJlbClcblxuI+iuoeeul+a3t+a3huefqemYtVxucHJlZGljdF9BQ190ZXN0IDwtIHJvdW5kKHByZWRpY3QoYnN0X21vZGVsLCBuZXdkYXRhID0gQUNfdGVzdCkpXG5BQ19jb25mdXNlX21hdHJpeF90ZXN0IDwtIHRhYmxlKEFDX3Rlc3RfZGF0YSRsYWJlbCwgcHJlZGljdF9BQ190ZXN0LCBkbm49YyhcXHRydWVcXCxcXHByZVxcKSlcbkFDX2NvbmZ1c2VfbWF0cml4X3Rlc3RfcHJvcCA8LSBwcm9wLnRhYmxlKEFDX2NvbmZ1c2VfbWF0cml4X3Rlc3QsMSlcbkFDX2NvbmZ1c2VfbWF0cml4X3Rlc3RfcHJvcCAgI+WIhuaekOWPkeiCsui9qOi/uVxuXG5jb25mdXNlX2J1YmJsZW1hdChBQ19jb25mdXNlX21hdHJpeF90ZXN0X3Byb3AsYyhcXEZpYnJvYmxhc3RcXCwgXFxTTUMxXFwsIFxcRmlicm9teW9jeXRlXFwsIFxcU01DMlxcKSwgYyhcXEZpYnJvbXlvY3l0ZVxcLCBcXFNNQzFcXCwgXFxTTUMyXFwpLCBcXFBBdG9BQ1xcKVxuXG5cbiNST0Pmm7Lnur9cbnhnYm9vc3Rfcm9jIDwtIHBST0M6Om11bHRpY2xhc3Mucm9jKEFDX3Rlc3RfZGF0YSRsYWJlbCwgcHJlZGljdF9BQ190ZXN0KSAj5aSa5YiG57G7Uk9DXG54Z2Jvb3N0X3JvY1tbXFxhdWNcXF1dXG5cbiMg6K6h566XQVJJIFxuYWRqdXN0ZWRSYW5kSW5kZXgocHJlZGljdF9BQ190ZXN0LCBBQ190ZXN0X2RhdGEkbGFiZWwpXG5gYGBcbmBgYCJ9 -->
```r
```r
AC_data <- get_data_table(ds2_AC, highvar = F, type = \data\)
AC_data <- AC_data[selected_features,]
AC_label <- as.numeric(as.character(Idents(ds2_AC)))
colnames(AC_data) <- NULL
AC_test_data <- list(data = t(as(AC_data,\dgCMatrix\)), label = AC_label)
AC_test <- xgb.DMatrix(data = AC_test_data$data,label = AC_test_data$label)
#计算混淆矩阵
predict_AC_test <- round(predict(bst_model, newdata = AC_test))
AC_confuse_matrix_test <- table(AC_test_data$label, predict_AC_test, dnn=c(\true\,\pre\))
AC_confuse_matrix_test_prop <- prop.table(AC_confuse_matrix_test,1)
AC_confuse_matrix_test_prop #分析发育轨迹
confuse_bubblemat(AC_confuse_matrix_test_prop,c(\Fibroblast\, \SMC1\, \Fibromyocyte\, \SMC2\), c(\Fibromyocyte\, \SMC1\, \SMC2\), \PAtoAC\)
#ROC曲线
xgboost_roc <- pROC::multiclass.roc(AC_test_data$label, predict_AC_test) #多分类ROC
xgboost_roc[[\auc\]]
# 计算ARI
adjustedRandIndex(predict_AC_test, AC_test_data$label)
<!-- rnb-source-end -->
<!-- rnb-chunk-end -->
<!-- rnb-text-begin -->
ARI = 0.8821278
## 选择特征common genes of top 500
## 使用所有来自PA的细胞训练分类器
## 应用在AC上,计算ARI
<!-- rnb-text-end -->
<!-- rnb-chunk-begin -->
<!-- rnb-source-begin eyJkYXRhIjoiYGBgclxuYGBgclxuQUNfZGF0YSA8LSBnZXRfZGF0YV90YWJsZShkczJfQUMsIGhpZ2h2YXIgPSBGLCB0eXBlID0gXFxkYXRhXFwpXG5BQ19kYXRhIDwtIEFDX2RhdGFbc2VsZWN0ZWRfZmVhdHVyZXMsXVxuQUNfbGFiZWwgPC0gYXMubnVtZXJpYyhhcy5jaGFyYWN0ZXIoSWRlbnRzKGRzMl9BQykpKVxuY29sbmFtZXMoQUNfZGF0YSkgPC0gTlVMTFxuXG5BQ190cmFpbl9kYXRhIDwtIGxpc3QoZGF0YSA9IHQoYXMoQUNfZGF0YSxcXGRnQ01hdHJpeFxcKSksIGxhYmVsID0gQUNfbGFiZWwpXG5cbkFDX3RyYWluIDwtIHhnYi5ETWF0cml4KGRhdGEgPSBBQ190cmFpbl9kYXRhJGRhdGEsbGFiZWwgPSBBQ190cmFpbl9kYXRhJGxhYmVsKVxuXG54Z2JfQUNyYW0gPC0gbGlzdChldGEgPSAwLjIsIG1heF9kZXB0aCA9IDYsXG4gICAgICAgICAgICAgICAgICBzdWJzYW1wbGUgPSAwLjYsICBudW1fY2xhc3MgPSBsZW5ndGgodGFibGUoSWRlbnRzKGRzMl9BQykpKSxcbiAgICAgICAgICAgICAgICAgIG9iamVjdGl2ZSA9IFxcbXVsdGk6c29mdG1heFxcLCBldmFsX21ldHJpYyA9ICdtbG9nbG9zcycpXG5cbmJzdF9tb2RlbDIgPC0geGdiLnRyYWluKHhnYl9BQ3JhbSwgQUNfdHJhaW4sIG5yb3VuZHMgPSAxMDAsIHZlcmJvc2UgPSAwKVxuXG4jIOeJueW+geaPkOWPllxuaW1wb3J0YW5jZTIgPC0geGdiLmltcG9ydGFuY2UoY29sbmFtZXMoQUNfdHJhaW4pLCBtb2RlbCA9IGJzdF9tb2RlbDIpXG5oZWFkKGltcG9ydGFuY2UyKVxueGdiLmdncGxvdC5pbXBvcnRhbmNlKGhlYWQoaW1wb3J0YW5jZTIsMjApLG5fY2x1c3RlcnMgPSAxKSArIHRoZW1lX2J3KCkrdGhlbWUoXG4gICAgYXhpcy50aXRsZS54ID0gZWxlbWVudF90ZXh0KHNpemUgPSAxNSksIGF4aXMudGV4dC54ID0gZWxlbWVudF90ZXh0KHNpemUgPSA4LCBjb2xvdXIgPSBcXGJsYWNrXFwpLFxuICAgIGF4aXMudGl0bGUueSA9IGVsZW1lbnRfdGV4dChzaXplID0gMTUpLCBheGlzLnRleHQueSA9IGVsZW1lbnRfdGV4dChzaXplID0gMTIsIGNvbG91ciA9IFxcYmxhY2tcXCksXG4gICAgbGVnZW5kLnRleHQgPSBlbGVtZW50X3RleHQoc2l6ZSA9IDIwKSwgbGVnZW5kLnRpdGxlID0gZWxlbWVudF9ibGFuaygpLCBwYW5lbC5ncmlkID0gZWxlbWVudF9ibGFuaygpKVxuXG53cml0ZS5jc3YoaW1wb3J0YW5jZTIsIFxcLi9kYXRhdGFibGUvQUN0cmFpbl9mZWF0dXJlcy5jc3ZcXCwgcm93Lm5hbWVzID0gRilcbm11bHRpX2ZlYXR1cmVwbG90KGhlYWQoaW1wb3J0YW5jZTIsOSkkRmVhdHVyZSwgZHMyX0FDKVxuXG5gYGBcbmBgYCJ9 -->
```r
```r
AC_data <- get_data_table(ds2_AC, highvar = F, type = \data\)
AC_data <- AC_data[selected_features,]
AC_label <- as.numeric(as.character(Idents(ds2_AC)))
colnames(AC_data) <- NULL
AC_train_data <- list(data = t(as(AC_data,\dgCMatrix\)), label = AC_label)
AC_train <- xgb.DMatrix(data = AC_train_data$data,label = AC_train_data$label)
xgb_ACram <- list(eta = 0.2, max_depth = 6,
subsample = 0.6, num_class = length(table(Idents(ds2_AC))),
objective = \multi:softmax\, eval_metric = 'mlogloss')
bst_model2 <- xgb.train(xgb_ACram, AC_train, nrounds = 100, verbose = 0)
# 特征提取
importance2 <- xgb.importance(colnames(AC_train), model = bst_model2)
head(importance2)
xgb.ggplot.importance(head(importance2,20),n_clusters = 1) + theme_bw()+theme(
axis.title.x = element_text(size = 15), axis.text.x = element_text(size = 8, colour = \black\),
axis.title.y = element_text(size = 15), axis.text.y = element_text(size = 12, colour = \black\),
legend.text = element_text(size = 20), legend.title = element_blank(), panel.grid = element_blank())
write.csv(importance2, \./datatable/ACtrain_features.csv\, row.names = F)
multi_featureplot(head(importance2,9)$Feature, ds2_AC)
<!-- rnb-source-end -->
<!-- rnb-chunk-end -->
<!-- rnb-text-begin -->
## 应用到AC上
<!-- rnb-text-end -->
<!-- rnb-chunk-begin -->
<!-- rnb-source-begin eyJkYXRhIjoiYGBgclxuYGBgclxuUEFfZGF0YSA8LSBnZXRfZGF0YV90YWJsZShkczJfUEEsIGhpZ2h2YXIgPSBGLCB0eXBlID0gXFxkYXRhXFwpXG5QQV9kYXRhIDwtIFBBX2RhdGFbc2VsZWN0ZWRfZmVhdHVyZXMsXVxuUEFfbGFiZWwgPC0gYXMubnVtZXJpYyhhcy5jaGFyYWN0ZXIoSWRlbnRzKGRzMl9QQSkpKVxuY29sbmFtZXMoUEFfZGF0YSkgPC0gTlVMTFxuXG5QQV90ZXN0X2RhdGEgPC0gbGlzdChkYXRhID0gdChhcyhQQV9kYXRhLFxcZGdDTWF0cml4XFwpKSwgbGFiZWwgPSBQQV9sYWJlbClcblxuUEFfdGVzdCA8LSB4Z2IuRE1hdHJpeChkYXRhID0gUEFfdGVzdF9kYXRhJGRhdGEsbGFiZWwgPSBQQV90ZXN0X2RhdGEkbGFiZWwpXG5cbiPorqHnrpfmt7fmt4bnn6npmLVcbnByZWRpY3RfUEFfdGVzdCA8LSByb3VuZChwcmVkaWN0KGJzdF9tb2RlbDIsIG5ld2RhdGEgPSBQQV90ZXN0KSlcbiBcblBBX2NvbmZ1c2VfbWF0cml4X3Rlc3QgPC0gdGFibGUoUEFfdGVzdF9kYXRhJGxhYmVsLCBwcmVkaWN0X1BBX3Rlc3QsIGRubj1jKFxcdHJ1ZVxcLFxccHJlXFwpKVxuUEFfY29uZnVzZV9tYXRyaXhfdGVzdF9wcm9wIDwtIHByb3AudGFibGUoUEFfY29uZnVzZV9tYXRyaXhfdGVzdCwxKVxuUEFfY29uZnVzZV9tYXRyaXhfdGVzdF9wcm9wICAj5YiG5p6Q5Y+R6IKy6L2o6L+5XG5cbmNvbmZ1c2VfYnViYmxlbWF0KFBBX2NvbmZ1c2VfbWF0cml4X3Rlc3RfcHJvcCxjKFxcRmlicm9teW9jeXRlXFwsIFxcU01DMVxcLCBcXFNNQzJcXCksYyhcXEZpYnJvYmxhc3RcXCwgXFxTTUMxXFwsIFxcRmlicm9teW9jeXRlXFwsIFxcU01DMlxcKSxzZXNzaW9uID0gXFxBQ3RvUEFcXClcblxuIyDorqHnrpdBUklcbmFkanVzdGVkUmFuZEluZGV4KHByZWRpY3RfUEFfdGVzdCwgUEFfdGVzdF9kYXRhJGxhYmVsKVxuYGBgXG5gYGAifQ== -->
```r
```r
PA_data <- get_data_table(ds2_PA, highvar = F, type = \data\)
PA_data <- PA_data[selected_features,]
PA_label <- as.numeric(as.character(Idents(ds2_PA)))
colnames(PA_data) <- NULL
PA_test_data <- list(data = t(as(PA_data,\dgCMatrix\)), label = PA_label)
PA_test <- xgb.DMatrix(data = PA_test_data$data,label = PA_test_data$label)
#计算混淆矩阵
predict_PA_test <- round(predict(bst_model2, newdata = PA_test))
PA_confuse_matrix_test <- table(PA_test_data$label, predict_PA_test, dnn=c(\true\,\pre\))
PA_confuse_matrix_test_prop <- prop.table(PA_confuse_matrix_test,1)
PA_confuse_matrix_test_prop #分析发育轨迹
confuse_bubblemat(PA_confuse_matrix_test_prop,c(\Fibromyocyte\, \SMC1\, \SMC2\),c(\Fibroblast\, \SMC1\, \Fibromyocyte\, \SMC2\),session = \ACtoPA\)
# 计算ARI
adjustedRandIndex(predict_PA_test, PA_test_data$label)
<!-- rnb-source-end -->
<!-- rnb-chunk-end -->
<!-- rnb-text-begin -->
ARI = 0.3024837
# sankey plot
PA -> AC
<!-- rnb-text-end -->
<!-- rnb-chunk-begin -->
<!-- rnb-source-begin eyJkYXRhIjoiYGBgclxuYGBgclxuSWRlbnRzKGRzMikgPC0gZHMyJHNldXJhdF9jbHVzdGVyc1xuZHMyX2RhdGEgPC0gZ2V0X2RhdGFfdGFibGUoZHMyLCBoaWdodmFyID0gRiwgdHlwZSA9IFxcZGF0YVxcKVxuZHMyX2xhYmVsIDwtIGFzLm51bWVyaWMoYXMuY2hhcmFjdGVyKElkZW50cyhkczIpKSlcblxuaW5kZXggPC0gYygxOmRpbShkczJfZGF0YSlbMl0pICU+JSBzYW1wbGUoY2VpbGluZygwLjMqZGltKGRzMl9kYXRhKVsyXSksIHJlcGxhY2UgPSBGLCBwcm9iID0gTlVMTClcbmNvbG5hbWVzKGRzMl9kYXRhKSA8LSBOVUxMXG5cbmRzMl90cmFpbl9kYXRhIDwtIGxpc3QoZGF0YSA9IHQoYXMoZHMyX2RhdGFbLC1pbmRleF0sXFxkZ0NNYXRyaXhcXCkpLCBsYWJlbCA9IGRzMl9sYWJlbFstaW5kZXhdKVxuZHMyX3Rlc3RfZGF0YSA8LSBsaXN0KGRhdGEgPSB0KGFzKGRzMl9kYXRhWyxpbmRleF0sXFxkZ0NNYXRyaXhcXCkpLCBsYWJlbCA9IGRzMl9sYWJlbFtpbmRleF0pXG5cbmRzMl90cmFpbiA8LSB4Z2IuRE1hdHJpeChkYXRhID0gZHMyX3RyYWluX2RhdGEkZGF0YSxsYWJlbCA9IGRzMl90cmFpbl9kYXRhJGxhYmVsKVxuZHMyX3Rlc3QgPC0geGdiLkRNYXRyaXgoZGF0YSA9IGRzMl90ZXN0X2RhdGEkZGF0YSxsYWJlbCA9IGRzMl90ZXN0X2RhdGEkbGFiZWwpXG5cbndhdGNobGlzdCA8LSBsaXN0KHRyYWluID0gZHMyX3RyYWluLCBldmFsID0gZHMyX3Rlc3QpXG54Z2JfcGFyYW0gPC0gbGlzdChldGEgPSAwLjIsIG1heF9kZXB0aCA9IDYsIFxuICAgICAgICAgICAgICAgICAgc3Vic2FtcGxlID0gMC42LCAgbnVtX2NsYXNzID0gbGVuZ3RoKHRhYmxlKElkZW50cyhkczIpKSksXG4gICAgICAgICAgICAgICAgICBvYmplY3RpdmUgPSBcXG11bHRpOnNvZnRtYXhcXCwgZXZhbF9tZXRyaWMgPSAnbWxvZ2xvc3MnKVxuXG5ic3RfbW9kZWwgPC0geGdiLnRyYWluKHhnYl9wYXJhbSwgZHMyX3RyYWluLCBucm91bmRzID0gMTAwLCB3YXRjaGxpc3QsIHZlcmJvc2UgPSAwKVxuXG5ldmFsX2xvc3MgPC0gYnN0X21vZGVsW1tcXGV2YWx1YXRpb25fbG9nXFxdXVtbXFxldmFsX21sb2dsb3NzXFxdXVxucGxvdF9seShkYXRhLmZyYW1lKGV2YWxfbG9zcyksIHggPSBjKDE6MTAwKSwgeSA9IGV2YWxfbG9zcykgJT4lIFxuICBhZGRfdHJhY2UodHlwZSA9IFxcc2NhdHRlclxcLCBtb2RlID0gXFxtYXJrZXJzK2xpbmVzXFwsIFxuICAgICAgICAgICAgbWFya2VyID0gbGlzdChjb2xvciA9IFxcYmxhY2tcXCwgbGluZSA9IGxpc3QoY29sb3IgPSBcXCMxRTkwRkZDN1xcLCB3aWR0aCA9IDEpKSxcbiAgICAgICAgICAgIGxpbmUgPSBsaXN0KGNvbG9yID0gXFwjMUU5MEZGODBcXCwgd2lkdGggPSAyKSkgJT4lIFxuICBsYXlvdXQoeGF4aXMgPSBsaXN0KHRpdGxlID0gXFxlcG9jaFxcKSx5YXhpcyA9IGxpc3QodGl0bGUgPSBcXGV2YWxfbWxvZ2xvc3NcXCkpXG5gYGBcbmBgYCJ9 -->
```r
```r
Idents(ds2) <- ds2$seurat_clusters
ds2_data <- get_data_table(ds2, highvar = F, type = \data\)
ds2_label <- as.numeric(as.character(Idents(ds2)))
index <- c(1:dim(ds2_data)[2]) %>% sample(ceiling(0.3*dim(ds2_data)[2]), replace = F, prob = NULL)
colnames(ds2_data) <- NULL
ds2_train_data <- list(data = t(as(ds2_data[,-index],\dgCMatrix\)), label = ds2_label[-index])
ds2_test_data <- list(data = t(as(ds2_data[,index],\dgCMatrix\)), label = ds2_label[index])
ds2_train <- xgb.DMatrix(data = ds2_train_data$data,label = ds2_train_data$label)
ds2_test <- xgb.DMatrix(data = ds2_test_data$data,label = ds2_test_data$label)
watchlist <- list(train = ds2_train, eval = ds2_test)
xgb_param <- list(eta = 0.2, max_depth = 6,
subsample = 0.6, num_class = length(table(Idents(ds2))),
objective = \multi:softmax\, eval_metric = 'mlogloss')
bst_model <- xgb.train(xgb_param, ds2_train, nrounds = 100, watchlist, verbose = 0)
eval_loss <- bst_model[[\evaluation_log\]][[\eval_mlogloss\]]
plot_ly(data.frame(eval_loss), x = c(1:100), y = eval_loss) %>%
add_trace(type = \scatter\, mode = \markers+lines\,
marker = list(color = \black\, line = list(color = \#1E90FFC7\, width = 1)),
line = list(color = \#1E90FF80\, width = 2)) %>%
layout(xaxis = list(title = \epoch\),yaxis = list(title = \eval_mlogloss\))
<!-- rnb-source-end -->
<!-- rnb-chunk-end -->
<!-- rnb-text-begin -->
#把结果投射回umap
<!-- rnb-text-end -->
<!-- rnb-chunk-begin -->
<!-- rnb-source-begin eyJkYXRhIjoiYGBgclxuYGBgclxuIyDnibnlvoHmj5Dlj5ZcbmltcG9ydGFuY2UgPC0geGdiLmltcG9ydGFuY2UoY29sbmFtZXMoZHMyX3RyYWluKSwgbW9kZWwgPSBic3RfbW9kZWwpXG5oZWFkKGltcG9ydGFuY2UpXG54Z2IuZ2dwbG90LmltcG9ydGFuY2UoaGVhZChpbXBvcnRhbmNlLDIwKSwgbl9jbHVzdGVycyA9IDEpICsgdGhlbWVfbWluaW1hbCgpXG5cbm11bHRpX2ZlYXR1cmVwbG90KGhlYWQoaW1wb3J0YW5jZSw5KSRGZWF0dXJlLCBkczIpIFxuZHMyX2dlbmVzIDwtIGhlYWQoaW1wb3J0YW5jZSwgNTAwKSAjI+mAieaLqXRvcDUwMFxuXG53cml0ZS5jc3YoZHMyX2dlbmVzLCBcXC4vZGF0YXRhYmxlL2RzMl9mZWF0dXJlcy5jc3ZcXCwgcm93Lm5hbWVzID0gRilcblxuXG5wcmVkaWN0X2RzMl90ZXN0IDwtIHJvdW5kKHByZWRpY3QoYnN0X21vZGVsLCBuZXdkYXRhID0gZHMyX3Rlc3QpKSAlPiUgXG4j5re35reG55+p6Zi1XG5kczJfY29uZnVzZV9tYXRyaXhfdGVzdCA8LSB0YWJsZShkczJfdGVzdF9kYXRhJGxhYmVsLCBwcmVkaWN0X2RzMl90ZXN0LCBkbm49YyhcXHRydWVcXCxcXHByZVxcKSlcbmRzMl9jb25mdXNlX21hdHJpeF90ZXN0X3Byb3AgPC0gcHJvcC50YWJsZShkczJfY29uZnVzZV9tYXRyaXhfdGVzdCwgMSlcbmRzMl9jb25mdXNlX21hdHJpeF90ZXN0X3Byb3BcblxueCA8LSBjKDA6NClcbnkgPC0gYygwOjQpXG5jb25mdXNlX2J1YmJsZW1hdChkczJfY29uZnVzZV9tYXRyaXhfdGVzdF9wcm9wLHgseSxcXGRzMl90cmFpblxcKVxuXG5cbiNST0Pmm7Lnur9cbnhnYm9vc3Rfcm9jIDwtIHBST0M6Om11bHRpY2xhc3Mucm9jKGRzMl90ZXN0X2RhdGEkbGFiZWwsIHByZWRpY3RfZHMyX3Rlc3QpICPlpJrliIbnsbtST0NcbnhnYm9vc3Rfcm9jW1tcXGF1Y1xcXV0gI+WPqumcgOimgei/meS4quWAvFxuYWRqdXN0ZWRSYW5kSW5kZXgoZHMyX3Rlc3RfZGF0YSRsYWJlbCwgcHJlZGljdF9kczJfdGVzdCkgI+WIhuexu+WZqOaAp+iDvVxuYGBgXG5gYGAifQ== -->
```r
```r
# 特征提取
importance <- xgb.importance(colnames(ds2_train), model = bst_model)
head(importance)
xgb.ggplot.importance(head(importance,20), n_clusters = 1) + theme_minimal()
multi_featureplot(head(importance,9)$Feature, ds2)
ds2_genes <- head(importance, 500) ##选择top500
write.csv(ds2_genes, \./datatable/ds2_features.csv\, row.names = F)
predict_ds2_test <- round(predict(bst_model, newdata = ds2_test)) %>%
#混淆矩阵
ds2_confuse_matrix_test <- table(ds2_test_data$label, predict_ds2_test, dnn=c(\true\,\pre\))
ds2_confuse_matrix_test_prop <- prop.table(ds2_confuse_matrix_test, 1)
ds2_confuse_matrix_test_prop
x <- c(0:4)
y <- c(0:4)
confuse_bubblemat(ds2_confuse_matrix_test_prop,x,y,\ds2_train\)
#ROC曲线
xgboost_roc <- pROC::multiclass.roc(ds2_test_data$label, predict_ds2_test) #多分类ROC
xgboost_roc[[\auc\]] #只需要这个值
adjustedRandIndex(ds2_test_data$label, predict_ds2_test) #分类器性能
<!-- rnb-source-end -->
<!-- rnb-chunk-end -->
<!-- rnb-text-begin -->
# 反着做
# 选择特征common genes of top 500
## 使用所有来自AC的细胞训练分类器
<!-- rnb-text-end -->
<!-- rnb-chunk-begin -->
<!-- rnb-source-begin eyJkYXRhIjoiYGBgclxuYGBgclxudGVtcCA8LSBnZXRfZGF0YV90YWJsZShkczEsIGhpZ2h2YXIgPSBULCB0eXBlID0gXFxkYXRhXFwpXG5kczFfZGF0YSA8LSBtYXRyaXgoZGF0YT0wLCBucm93ID0gZGltKGRzMl9kYXRhKVsxXSwgbmNvbCA9IGxlbmd0aChjb2xuYW1lcyh0ZW1wKSksIFxuICAgICAgICAgICAgICAgICAgIGJ5cm93ID0gRkFMU0UsIGRpbW5hbWVzID0gbGlzdChyb3duYW1lcyhkczJfZGF0YSksY29sbmFtZXModGVtcCkpKVxuZm9yKGkgaW4gaW50ZXJzZWN0KHJvd25hbWVzKGRzMl9kYXRhKSwgcm93bmFtZXModGVtcCkpKXtcbiAgZHMxX2RhdGFbaSxdIDwtIHRlbXBbaSxdXG59XG5ybSh0ZW1wKVxuZHMxX2xhYmVsIDwtIGFzLm51bWVyaWMoYXMuY2hhcmFjdGVyKElkZW50cyhkczEpKSlcbmNvbG5hbWVzKGRzMV9kYXRhKSA8LSBOVUxMXG5kczFfdGVzdF9kYXRhIDwtIGxpc3QoZGF0YSA9IHQoYXMoZHMxX2RhdGEsXFxkZ0NNYXRyaXhcXCkpLCBsYWJlbCA9IGRzMV9sYWJlbClcbmRzMV90ZXN0IDwtIHhnYi5ETWF0cml4KGRhdGEgPSBkczFfdGVzdF9kYXRhJGRhdGEsbGFiZWwgPSBkczFfdGVzdF9kYXRhJGxhYmVsKVxuXG4j6aKE5rWL57uT5p6cXG5wcmVkaWN0X2RzMV90ZXN0IDwtIHJvdW5kKHByZWRpY3QoYnN0X21vZGVsLCBuZXdkYXRhID0gZHMxX3Rlc3QpKVxuXG4j6K6h566X5re35reG55+p6Zi1XG5kczFfZGF0YV9jb25mdXNlX21hdHJpeF90ZXN0IDwtIHRhYmxlKGRzMV90ZXN0X2RhdGEkbGFiZWwsIHByZWRpY3RfZHMxX3Rlc3QsIGRubj1jKFxcdHJ1ZVxcLFxccHJlXFwpKVxuZHMxX2RhdGFfY29uZnVzZV9tYXRyaXhfdGVzdF9wcm9wIDwtIHByb3AudGFibGUoZHMxX2RhdGFfY29uZnVzZV9tYXRyaXhfdGVzdCwxKVxuXG4j57uY5Yi25re35reG55+p6Zi1XG54IDwtIGMoXFxGaWJyb215b2N5dGVcXCwgXFxTTUMxXFwsIFxcU01DMlxcKVxueSA8LSBjKFxcRmlicm9ibGFzdFxcLCBcXFNNQzFcXCwgXFxGaWJyb215b2N5dGVcXCwgXFxTTUMyXFwpXG5jb25mdXNlX2J1YmJsZW1hdChkczFfZGF0YV9jb25mdXNlX21hdHJpeF90ZXN0X3Byb3AseCx5LFxcZHMydG9kczFcXClcblxuZHMxX2RhdGFfY29uZnVzZV9tYXRyaXhfdGVzdFxuZHMxX2RhdGFfY29uZnVzZV9tYXRyaXhfdGVzdF9wcm9wICAj5YiG5p6Q5Y+R6IKy6L2o6L+5XG4jUk9D5puy57q/XG54Z2Jvb3N0X3JvYyA8LSBwUk9DOjptdWx0aWNsYXNzLnJvYyhkczFfdGVzdF9kYXRhJGxhYmVsLCBwcmVkaWN0X2RzMV90ZXN0KSAj5aSa5YiG57G7Uk9DXG54Z2Jvb3N0X3JvY1tbXFxhdWNcXF1dXG5cbiMg6K6h566XQVJJIFxuYWRqdXN0ZWRSYW5kSW5kZXgocHJlZGljdF9kczFfdGVzdCwgZHMxX3Rlc3RfZGF0YSRsYWJlbClcbmBgYFxuYGBgIn0= -->
```r
```r
temp <- get_data_table(ds1, highvar = T, type = \data\)
ds1_data <- matrix(data=0, nrow = dim(ds2_data)[1], ncol = length(colnames(temp)),
byrow = FALSE, dimnames = list(rownames(ds2_data),colnames(temp)))
for(i in intersect(rownames(ds2_data), rownames(temp))){
ds1_data[i,] <- temp[i,]
}
rm(temp)
ds1_label <- as.numeric(as.character(Idents(ds1)))
colnames(ds1_data) <- NULL
ds1_test_data <- list(data = t(as(ds1_data,\dgCMatrix\)), label = ds1_label)
ds1_test <- xgb.DMatrix(data = ds1_test_data$data,label = ds1_test_data$label)
#预测结果
predict_ds1_test <- round(predict(bst_model, newdata = ds1_test))
#计算混淆矩阵
ds1_data_confuse_matrix_test <- table(ds1_test_data$label, predict_ds1_test, dnn=c(\true\,\pre\))
ds1_data_confuse_matrix_test_prop <- prop.table(ds1_data_confuse_matrix_test,1)
#绘制混淆矩阵
x <- c(\Fibromyocyte\, \SMC1\, \SMC2\)
y <- c(\Fibroblast\, \SMC1\, \Fibromyocyte\, \SMC2\)
confuse_bubblemat(ds1_data_confuse_matrix_test_prop,x,y,\ds2tods1\)
ds1_data_confuse_matrix_test
ds1_data_confuse_matrix_test_prop #分析发育轨迹
#ROC曲线
xgboost_roc <- pROC::multiclass.roc(ds1_test_data$label, predict_ds1_test) #多分类ROC
xgboost_roc[[\auc\]]
# 计算ARI
adjustedRandIndex(predict_ds1_test, ds1_test_data$label)
<!-- rnb-source-end -->
<!-- rnb-chunk-end -->
<!-- rnb-text-begin -->
## 应用在PA上,计算ARI
<!-- rnb-text-end -->
<!-- rnb-chunk-begin -->
<!-- rnb-source-begin eyJkYXRhIjoiYGBgclxuYGBgclxuSWRlbnRzKGRzMSkgPC0gcHJlZGljdF9kczFfdGVzdFxuZHMxJHByZWRpY3RfZHMxX3Rlc3QgPC0gcHJlZGljdF9kczFfdGVzdFxudW1hcHBsb3QoZHMxLGdyb3VwLmJ5ID0gXFxwcmVkaWN0X2RzMV90ZXN0XFwpXG5JZGVudHMoZHMxKSA8LSBkczEkc2V1cmF0X2NsdXN0ZXJzXG5gYGBcbmBgYCJ9 -->
```r
```r
Idents(ds1) <- predict_ds1_test
ds1$predict_ds1_test <- predict_ds1_test
umapplot(ds1,group.by = \predict_ds1_test\)
Idents(ds1) <- ds1$seurat_clusters
<!-- rnb-source-end -->
<!-- rnb-chunk-end -->
<!-- rnb-text-begin -->
ARI = 0.1797689
## 把结果投射回umap
<!-- rnb-text-end -->
<!-- rnb-chunk-begin -->
<!-- rnb-source-begin eyJkYXRhIjoiYGBgclxuYGBgclxueCA8LSBjKFxcZHMwXzBcXCwgXFxkczBfMVxcLCBcXGRzMF8yXFwsIFxcZHMwXzNcXCwgXFxkczBfNFxcKVxueSA8LSBjKFxcQUNfMFxcLCBcXEFDXzFcXCwgXFxBQ18yXFwpXG5cbnByb3AgPC0gYXMubnVtZXJpYyhkczBfZGF0YV9jb25mdXNlX21hdHJpeF90ZXN0X3Byb3ApXG5kYXRhIDwtIGV4cGFuZC5ncmlkKHggPSB4LCB5ID0geSkgJT4lIGJpbmRfY29scyhwcm9wID0gcHJvcClcbnBsb3QgPC0gZ2dwbG90KGRhdGEsIGFlcyh4ID0geCwgeSA9IHksIGNvbG91ciA9IHByb3AsIHNpemUgPSBwcm9wKSkgK1xuICBnZW9tX3BvaW50KCkrXG4gIHNjYWxlX3NpemVfY29udGludW91cyhyYW5nZSA9IGMoMCwgMTApKSArIFxuICBsYWJzKHggPSBcXGNsdXN0ZXJzXFwsIHkgPSBcXGluZmVycmVkIGZyb21cXCkgKyB0aGVtZV9idygpXG5nZ3NhdmUoXFwuL3Bsb3RzL0FDbW9kZWxfaHVtYW5jb3IucG5nXFwsIHBsb3QgPSBwbG90LCBkZXZpY2UgPSBwbmcsIHdpZHRoID0gNSxoZWlnaHQgPSA0KVxuXG5kczBfZGF0YV9jb25mdXNlX21hdHJpeF90ZXN0XG5kczBfZGF0YV9jb25mdXNlX21hdHJpeF90ZXN0X3Byb3AgICPliIbmnpDlj5HogrLovajov7lcblxuI1JPQ+absue6v1xueGdib29zdF9yb2MgPC0gcFJPQzo6bXVsdGljbGFzcy5yb2MoZHMwX3Rlc3RfZGF0YSRsYWJlbCwgcHJlZGljdF9kczBfdGVzdCkgI+WkmuWIhuexu1JPQ1xuXG4jIOiuoeeul0FSSSBcbmFkanVzdGVkUmFuZEluZGV4KHByZWRpY3RfZHMwX3Rlc3QsIGRzMF90ZXN0X2RhdGEkbGFiZWwpXG5gYGBcbmBgYCJ9 -->
```r
```r
x <- c(\ds0_0\, \ds0_1\, \ds0_2\, \ds0_3\, \ds0_4\)
y <- c(\AC_0\, \AC_1\, \AC_2\)
prop <- as.numeric(ds0_data_confuse_matrix_test_prop)
data <- expand.grid(x = x, y = y) %>% bind_cols(prop = prop)
plot <- ggplot(data, aes(x = x, y = y, colour = prop, size = prop)) +
geom_point()+
scale_size_continuous(range = c(0, 10)) +
labs(x = \clusters\, y = \inferred from\) + theme_bw()
ggsave(\./plots/ACmodel_humancor.png\, plot = plot, device = png, width = 5,height = 4)
ds0_data_confuse_matrix_test
ds0_data_confuse_matrix_test_prop #分析发育轨迹
#ROC曲线
xgboost_roc <- pROC::multiclass.roc(ds0_test_data$label, predict_ds0_test) #多分类ROC
# 计算ARI
adjustedRandIndex(predict_ds0_test, ds0_test_data$label)
<!-- rnb-source-end -->
<!-- rnb-chunk-end -->
<!-- rnb-text-begin -->
## sankey plot
<!-- rnb-text-end -->
<!-- rnb-chunk-begin -->
<!-- rnb-source-begin eyJkYXRhIjoiYGBgclxuYGBgclxubGFiZWxzIDwtIGxhcHBseShsZXZlbHMoSWRlbnRzKGRzMl9BQykpLCBwYXN0ZTAsIFxcX0FDXFwpICU+JSBhcy5jaGFyYWN0ZXIoKVxubGFiZWxzMiA8LSBsYXBwbHkobGV2ZWxzKElkZW50cyhkczApKSwgcGFzdGUwLCBcXF9kczBcXCkgJT4lIGFzLmNoYXJhY3RlcigpXG5zb3VyY2VzIDwtIHJlcCgwOihsZW5ndGgobGFiZWxzKS0xKSwgZWFjaCA9IGxlbmd0aChsYWJlbHMyKSkgICPms6jmhI/ov5nph4znmoRlYWNo5ZKMdGltZXPnmoTljLrliKtcbmNvbG9ycyA8LSByZXAoY29sb3JzX2xpc3RbMTpsZW5ndGgobGFiZWxzKV0sIGVhY2ggPSBsZW5ndGgobGFiZWxzMikpXG50YXJnZXRzIDwtIHJlcChsZW5ndGgobGFiZWxzKSswOihsZW5ndGgobGFiZWxzMiktMSksIHRpbWVzID0gbGVuZ3RoKGxhYmVscykpXG5cbnBsb3RfbHkodHlwZSA9IFxcc2Fua2V5XFwsIG9yaWVudGF0aW9uID0gXFxoXFwsXG4gICAgbm9kZSA9IGxpc3QoXG4gICAgICBsYWJlbCA9IGMobGFiZWxzLGxhYmVsczIpLCBcbiAgICAgIGNvbG9yID0gY29sb3JzX2xpc3QsIHBhZCA9IDE1LCB0aGlja25lc3MgPSAzMCxcbiAgICAgIGxpbmUgPSBsaXN0KFxuICAgICAgICBjb2xvciA9IFxcYmxhY2tcXCxcbiAgICAgICAgd2lkdGggPSAxKSksXG4gICAgbGluayA9IGxpc3QoXG4gICAgICBzb3VyY2UgPSBzb3VyY2VzLFxuICAgICAgdGFyZ2V0ID0gdGFyZ2V0cyxcbiAgICAgIHZhbHVlID0gIGFzLm51bWVyaWMoZHMwX2RhdGFfY29uZnVzZV9tYXRyaXhfdGVzdCksXG4gICAgICBjb2xvciA9IGNvbG9yc1xuICAgICAgKSlcbmBgYFxuYGBgIn0= -->
```r
```r
labels <- lapply(levels(Idents(ds2_AC)), paste0, \_AC\) %>% as.character()
labels2 <- lapply(levels(Idents(ds0)), paste0, \_ds0\) %>% as.character()
sources <- rep(0:(length(labels)-1), each = length(labels2)) #注意这里的each和times的区别
colors <- rep(colors_list[1:length(labels)], each = length(labels2))
targets <- rep(length(labels)+0:(length(labels2)-1), times = length(labels))
plot_ly(type = \sankey\, orientation = \h\,
node = list(
label = c(labels,labels2),
color = colors_list, pad = 15, thickness = 30,
line = list(
color = \black\,
width = 1)),
link = list(
source = sources,
target = targets,
value = as.numeric(ds0_data_confuse_matrix_test),
color = colors
))
<!-- rnb-source-end -->
<!-- rnb-chunk-end -->
<!-- rnb-text-begin -->
# varify 部分
病变程度量化
# 数据集CA_dataset1
## ds2全体训练
<!-- rnb-text-end -->
<!-- rnb-chunk-begin -->
<!-- rnb-source-begin eyJkYXRhIjoiYGBgclxuSWRlbnRzKGRzMikgPC0gZHMyJHNldXJhdF9jbHVzdGVyc1xuZHMyX2RhdGEgPC0gZ2V0X2RhdGFfdGFibGUoZHMyLCBoaWdodmFyID0gRiwgdHlwZSA9IFwiZGF0YVwiKVxuZHMyX2xhYmVsIDwtIGFzLm51bWVyaWMoYXMuY2hhcmFjdGVyKElkZW50cyhkczIpKSlcblxuaW5kZXggPC0gYygxOmRpbShkczJfZGF0YSlbMl0pICU+JSBzYW1wbGUoY2VpbGluZygwLjMqZGltKGRzMl9kYXRhKVsyXSksIHJlcGxhY2UgPSBGLCBwcm9iID0gTlVMTClcbmNvbG5hbWVzKGRzMl9kYXRhKSA8LSBOVUxMXG5cbmRzMl90cmFpbl9kYXRhIDwtIGxpc3QoZGF0YSA9IHQoYXMoZHMyX2RhdGFbLC1pbmRleF0sXCJkZ0NNYXRyaXhcIikpLCBsYWJlbCA9IGRzMl9sYWJlbFstaW5kZXhdKVxuZHMyX3Rlc3RfZGF0YSA8LSBsaXN0KGRhdGEgPSB0KGFzKGRzMl9kYXRhWyxpbmRleF0sXCJkZ0NNYXRyaXhcIikpLCBsYWJlbCA9IGRzMl9sYWJlbFtpbmRleF0pXG5cbmRzMl90cmFpbiA8LSB4Z2IuRE1hdHJpeChkYXRhID0gZHMyX3RyYWluX2RhdGEkZGF0YSxsYWJlbCA9IGRzMl90cmFpbl9kYXRhJGxhYmVsKVxuZHMyX3Rlc3QgPC0geGdiLkRNYXRyaXgoZGF0YSA9IGRzMl90ZXN0X2RhdGEkZGF0YSxsYWJlbCA9IGRzMl90ZXN0X2RhdGEkbGFiZWwpXG5cbndhdGNobGlzdCA8LSBsaXN0KHRyYWluID0gZHMyX3RyYWluLCBldmFsID0gZHMyX3Rlc3QpXG54Z2JfcGFyYW0gPC0gbGlzdChldGEgPSAwLjIsIG1heF9kZXB0aCA9IDYsIFxuICAgICAgICAgICAgICAgICAgc3Vic2FtcGxlID0gMC42LCAgbnVtX2NsYXNzID0gbGVuZ3RoKHRhYmxlKElkZW50cyhkczIpKSksXG4gICAgICAgICAgICAgICAgICBvYmplY3RpdmUgPSBcIm11bHRpOnNvZnRtYXhcIiwgZXZhbF9tZXRyaWMgPSAnbWxvZ2xvc3MnKVxuXG5ic3RfbW9kZWwgPC0geGdiLnRyYWluKHhnYl9wYXJhbSwgZHMyX3RyYWluLCBucm91bmRzID0gMTAwLCB3YXRjaGxpc3QsIHZlcmJvc2UgPSAwKVxuXG5ldmFsX2xvc3MgPC0gYnN0X21vZGVsW1tcImV2YWx1YXRpb25fbG9nXCJdXVtbXCJldmFsX21sb2dsb3NzXCJdXVxucGxvdF9seShkYXRhLmZyYW1lKGV2YWxfbG9zcyksIHggPSBjKDE6MTAwKSwgeSA9IGV2YWxfbG9zcykgJT4lIFxuICBhZGRfdHJhY2UodHlwZSA9IFwic2NhdHRlclwiLCBtb2RlID0gXCJtYXJrZXJzK2xpbmVzXCIsIFxuICAgICAgICAgICAgbWFya2VyID0gbGlzdChjb2xvciA9IFwiYmxhY2tcIiwgbGluZSA9IGxpc3QoY29sb3IgPSBcIiMxRTkwRkZDN1wiLCB3aWR0aCA9IDEpKSxcbiAgICAgICAgICAgIGxpbmUgPSBsaXN0KGNvbG9yID0gXCIjMUU5MEZGODBcIiwgd2lkdGggPSAyKSkgJT4lIFxuICBsYXlvdXQoeGF4aXMgPSBsaXN0KHRpdGxlID0gXCJlcG9jaFwiKSx5YXhpcyA9IGxpc3QodGl0bGUgPSBcImV2YWxfbWxvZ2xvc3NcIikpXG5gYGAifQ== -->
```r
Idents(ds2) <- ds2$seurat_clusters
ds2_data <- get_data_table(ds2, highvar = F, type = "data")
ds2_label <- as.numeric(as.character(Idents(ds2)))
index <- c(1:dim(ds2_data)[2]) %>% sample(ceiling(0.3*dim(ds2_data)[2]), replace = F, prob = NULL)
colnames(ds2_data) <- NULL
ds2_train_data <- list(data = t(as(ds2_data[,-index],"dgCMatrix")), label = ds2_label[-index])
ds2_test_data <- list(data = t(as(ds2_data[,index],"dgCMatrix")), label = ds2_label[index])
ds2_train <- xgb.DMatrix(data = ds2_train_data$data,label = ds2_train_data$label)
ds2_test <- xgb.DMatrix(data = ds2_test_data$data,label = ds2_test_data$label)
watchlist <- list(train = ds2_train, eval = ds2_test)
xgb_param <- list(eta = 0.2, max_depth = 6,
subsample = 0.6, num_class = length(table(Idents(ds2))),
objective = "multi:softmax", eval_metric = 'mlogloss')
bst_model <- xgb.train(xgb_param, ds2_train, nrounds = 100, watchlist, verbose = 0)
eval_loss <- bst_model[["evaluation_log"]][["eval_mlogloss"]]
plot_ly(data.frame(eval_loss), x = c(1:100), y = eval_loss) %>%
add_trace(type = "scatter", mode = "markers+lines",
marker = list(color = "black", line = list(color = "#1E90FFC7", width = 1)),
line = list(color = "#1E90FF80", width = 2)) %>%
layout(xaxis = list(title = "epoch"),yaxis = list(title = "eval_mlogloss"))
adjustedRandIndex(ds2_test_data$label, predict_ds2_test) #分类器性能
[1] 0.8831684
```r
lym_PA_data <- get_data_table(lym_ds2_PA, highvar = F, type = \data\)
lym_PA_label <- as.numeric(as.character(Idents(lym_ds2_PA)))
set.seed(7)
index <- c(1:dim(lym_PA_data)[2]) %>% sample(ceiling(0.3*dim(lym_PA_data)[2]), replace = F, prob = NULL)
colnames(lym_PA_data) <- NULL
lym_PA_train_data <- list(data = t(as(lym_PA_data[,-index],\dgCMatrix\)), label = lym_PA_label[-index])
lym_PA_test_data <- list(data = t(as(lym_PA_data[,index],\dgCMatrix\)), label = lym_PA_label[index])
lym_PA_train <- xgb.DMatrix(data = lym_PA_train_data$data,label = lym_PA_train_data$label)
lym_PA_test <- xgb.DMatrix(data = lym_PA_test_data$data,label = lym_PA_test_data$label)
watchlist <- list(train = lym_PA_train, eval = lym_PA_test)
xgb_param <- list(eta = 0.2, max_depth = 6,
subsample = 0.6, num_class = length(table(Idents(lym_ds2_PA))),
objective = \multi:softmax\, eval_metric = 'mlogloss')
bst_model <- xgb.train(xgb_param, lym_PA_train, nrounds = 100, watchlist, verbose = 0)
<!-- rnb-source-end -->
<!-- rnb-chunk-end -->
<!-- rnb-text-begin -->
ARI = 0.2321442
## 投射回umap
<!-- rnb-text-end -->
<!-- rnb-chunk-begin -->
<!-- rnb-source-begin eyJkYXRhIjoiYGBgclxuYGBgclxubGFiZWxzIDwtIGxhcHBseShsZXZlbHMoSWRlbnRzKGx5bV9kczJfUEEpKSwgcGFzdGUwLCBcXF9seW1QQVxcKSAlPiUgYXMuY2hhcmFjdGVyKClcbmxhYmVsczIgPC0gbGFwcGx5KGxldmVscyhJZGVudHMobHltX2RzMl9BQykpLCBwYXN0ZTAsIFxcX2x5bUFDXFwpICU+JSBhcy5jaGFyYWN0ZXIoKVxuc291cmNlcyA8LSByZXAoMDo1LCBlYWNoID0gNSkgICPms6jmhI/ov5nph4znmoRlYWNo5ZKMdGltZXPnmoTljLrliKtcbmNvbG9ycyA8LSByZXAoY29sb3JzX2xpc3RbMTo2XSwgZWFjaCA9IDUpXG50YXJnZXRzIDwtIHJlcCg2OjEwLCB0aW1lcyA9IDYpXG5cbnBsb3RfbHkodHlwZSA9IFxcc2Fua2V5XFwsIG9yaWVudGF0aW9uID0gXFxoXFwsXG4gICAgbm9kZSA9IGxpc3QoXG4gICAgICBsYWJlbCA9IGMobGFiZWxzLGxhYmVsczIpLCBcbiAgICAgIGNvbG9yID0gY29sb3JzX2xpc3QsIHBhZCA9IDE1LCB0aGlja25lc3MgPSAzMCxcbiAgICAgIGxpbmUgPSBsaXN0KFxuICAgICAgICBjb2xvciA9IFxcYmxhY2tcXCxcbiAgICAgICAgd2lkdGggPSAxKSksXG4gICAgbGluayA9IGxpc3QoXG4gICAgICBzb3VyY2UgPSBzb3VyY2VzLFxuICAgICAgdGFyZ2V0ID0gdGFyZ2V0cyxcbiAgICAgIHZhbHVlID0gIGFzLm51bWVyaWMobHltX0FDX2NvbmZ1c2VfbWF0cml4X3Rlc3QpLFxuICAgICAgY29sb3IgPSBjb2xvcnNcbiAgICAgICkpXG5cblxudW1hcHBsb3QobHltX2RzMl9BQylcbnVtYXBwbG90KGx5bV9kczJfUEEpXG5gYGBcbmBgYCJ9 -->
```r
```r
labels <- lapply(levels(Idents(lym_ds2_PA)), paste0, \_lymPA\) %>% as.character()
labels2 <- lapply(levels(Idents(lym_ds2_AC)), paste0, \_lymAC\) %>% as.character()
sources <- rep(0:5, each = 5) #注意这里的each和times的区别
colors <- rep(colors_list[1:6], each = 5)
targets <- rep(6:10, times = 6)
plot_ly(type = \sankey\, orientation = \h\,
node = list(
label = c(labels,labels2),
color = colors_list, pad = 15, thickness = 30,
line = list(
color = \black\,
width = 1)),
link = list(
source = sources,
target = targets,
value = as.numeric(lym_AC_confuse_matrix_test),
color = colors
))
umapplot(lym_ds2_AC)
umapplot(lym_ds2_PA)
<!-- rnb-source-end -->
<!-- rnb-chunk-end -->
<!-- rnb-text-begin -->
# 冠状动脉数据集
<!-- rnb-text-end -->
<!-- rnb-chunk-begin -->
<!-- rnb-source-begin eyJkYXRhIjoiYGBgclxuYGBgclxuc2Fua2V5X3Bsb3QgPC0gZnVuY3Rpb24oY29uZnVzZV9tYXRyaXgsIGxhYmVsMSwgbGFiZWwyLCBzZXNzaW9uID0gXFxzZXNzaW9uXFwpXG57XG4gIHNvdXJjZXMgPC0gcmVwKDA6KGxlbmd0aChsYWJlbDEpLTEpLCBlYWNoID0gbGVuZ3RoKGxhYmVsMikpICAj5rOo5oSP6L+Z6YeM55qEZWFjaOWSjHRpbWVz55qE5Yy65YirXG4gIGNvbG9ycyA8LSByZXAoYWVyb19jb2xvcnNfbGlzdFsxOmxlbmd0aChsYWJlbDEpXSwgZWFjaCA9IGxlbmd0aChsYWJlbDIpKVxuICB0YXJnZXRzIDwtIHJlcChsZW5ndGgobGFiZWwxKSswOihsZW5ndGgobGFiZWwyKS0xKSwgdGltZXMgPSBsZW5ndGgobGFiZWwxKSlcblxuICBwbG90X2x5KHR5cGUgPSBcXHNhbmtleVxcLCBvcmllbnRhdGlvbiA9IFxcaFxcLFxuICAgICAgbm9kZSA9IGxpc3QoXG4gICAgICAgIGxhYmVsID0gYyhsYWJlbDEsbGFiZWwyKSwgXG4gICAgICAgIGNvbG9yID0gY29sb3JzX2xpc3QsIHBhZCA9IDE1LCB0aGlja25lc3MgPSAzMCxcbiAgICAgICAgbGluZSA9IGxpc3QoY29sb3IgPSBcXGJsYWNrXFwsIHdpZHRoID0gMSkpLFxuICAgICAgbGluayA9IGxpc3QoXG4gICAgICAgIHNvdXJjZSA9IHNvdXJjZXMsIHRhcmdldCA9IHRhcmdldHMsXG4gICAgICAgIHZhbHVlID0gIGFzLm51bWVyaWMoY29uZnVzZV9tYXRyaXgpLFxuICAgICAgICBjb2xvciA9IGNvbG9yc1xuICAgICAgICApKSAlPiUgbGF5b3V0KHRpdGxlPXNlc3Npb24sIGZvbnQ9bGlzdChmYW1pbHkgPSBcXEFyaWFsXFwsc2l6ZSA9IDIwLCBjb2xvciA9ICdibGFjaycpKVxufVxuXG5cbmNvbmZ1c2VfYnViYmxlbWF0IDwtIGZ1bmN0aW9uKGNvbmZ1c2VfbWF0cml4X3Byb3AsIGxhYmVsMSwgbGFiZWwyLCBzZXNzaW9uID0gXFxzZXNzaW9uXFwpXG57XG5wcm9wIDwtIGFzLm51bWVyaWMoY29uZnVzZV9tYXRyaXhfcHJvcClcbmRhdGEgPC0gZXhwYW5kLmdyaWQoeCA9IGxhYmVsMSwgeSA9IGxhYmVsMikgJT4lIGJpbmRfY29scyhwcm9wID0gcHJvcClcbnBsb3QgPC0gZ2dwbG90KGRhdGEsIGFlcyh4ID0geCwgeSA9IHksIGNvbG91ciA9IHByb3AsIHNpemUgPSBwcm9wKSkgK1xuICBnZW9tX3BvaW50KCkrXG4gIHNjYWxlX3NpemVfY29udGludW91cyhyYW5nZSA9IGMoMCwgMTApKSArIFxuICBsYWJzKHggPSBcXGNsdXN0ZXJzXFwsIHkgPSBcXGluZmVycmVkIGZyb21cXCkgKyB0aGVtZV9idygpXG5cbmdnc2F2ZShwYXN0ZTAoc2Vzc2lvbiwgXFwuc3ZnXFwpLCBwbG90ID0gcGxvdCwgZGV2aWNlID0gc3ZnLCB3aWR0aCA9IDUsaGVpZ2h0ID0gNClcbn1cblxuIyMg6L+U5Zue5pyA5aSn55qE5qaC546H5a+55bqU55qEaW5kZXhcbmZ1bmMgPC0gZnVuY3Rpb24ocywgaWRlbnQpXG57XG4gIGlmKG1heChzKT4xLjIvbGVuZ3RoKGlkZW50KSlcbiAgICByZXR1cm4oaWRlbnRbd2hpY2gocyA9PSBtYXgocykpXSlcbiAgZWxzZVxuICAgIHJldHVybihcXHVuYXNzaWduZWRcXClcbn1cblxuYGBgXG5gYGAifQ== -->
```r
```r
sankey_plot <- function(confuse_matrix, label1, label2, session = \session\)
{
sources <- rep(0:(length(label1)-1), each = length(label2)) #注意这里的each和times的区别
colors <- rep(aero_colors_list[1:length(label1)], each = length(label2))
targets <- rep(length(label1)+0:(length(label2)-1), times = length(label1))
plot_ly(type = \sankey\, orientation = \h\,
node = list(
label = c(label1,label2),
color = colors_list, pad = 15, thickness = 30,
line = list(color = \black\, width = 1)),
link = list(
source = sources, target = targets,
value = as.numeric(confuse_matrix),
color = colors
)) %>% layout(title=session, font=list(family = \Arial\,size = 20, color = 'black'))
}
confuse_bubblemat <- function(confuse_matrix_prop, label1, label2, session = \session\)
{
prop <- as.numeric(confuse_matrix_prop)
data <- expand.grid(x = label1, y = label2) %>% bind_cols(prop = prop)
plot <- ggplot(data, aes(x = x, y = y, colour = prop, size = prop)) +
geom_point()+
scale_size_continuous(range = c(0, 10)) +
labs(x = \clusters\, y = \inferred from\) + theme_bw()
ggsave(paste0(session, \.svg\), plot = plot, device = svg, width = 5,height = 4)
}
## 返回最大的概率对应的index
func <- function(s, ident)
{
if(max(s)>1.2/length(ident))
return(ident[which(s == max(s))])
else
return(\unassigned\)
}
<!-- rnb-source-end -->
<!-- rnb-chunk-end -->
<!-- rnb-text-begin -->
<!-- rnb-text-end -->
<!-- rnb-chunk-begin -->
<!-- rnb-source-begin eyJkYXRhIjoiYGBgclxuYGBgclxuYWVyb19jb2xvcnNfbGlzdCA8LSBhcy5jaGFyYWN0ZXIobGFwcGx5KGNvbG9yc19saXN0LCBwYXN0ZTAsIFxcQTBcXCkpICPpgI/mmI7ljJbpopzoibJcbmBgYFxuYGBgIn0= -->
```r
```r
aero_colors_list <- as.character(lapply(colors_list, paste0, \A0\)) #透明化颜色
<!-- rnb-source-end -->
<!-- rnb-chunk-end -->
<!-- rnb-text-begin -->
<!-- rnb-text-end -->
<!-- rnb-chunk-begin -->
<!-- rnb-source-begin eyJkYXRhIjoiYGBgclxueCA8LSBjKFwiZHMwXzBcIiwgXCJkczBfMVwiLCBcImRzMF8yXCIsIFwiZHMwXzNcIiwgXCJkczBfNFwiKVxueSA8LSBjKFwiQUNfMFwiLCBcIkFDXzFcIiwgXCJBQ18yXCIpXG5cbnByb3AgPC0gYXMubnVtZXJpYyhkczBfZGF0YV9jb25mdXNlX21hdHJpeF90ZXN0X3Byb3ApXG5kYXRhIDwtIGV4cGFuZC5ncmlkKHggPSB4LCB5ID0geSkgJT4lIGJpbmRfY29scyhwcm9wID0gcHJvcClcbnBsb3QgPC0gZ2dwbG90KGRhdGEsIGFlcyh4ID0geCwgeSA9IHksIGNvbG91ciA9IHByb3AsIHNpemUgPSBwcm9wKSkgK1xuICBnZW9tX3BvaW50KCkrXG4gIHNjYWxlX3NpemVfY29udGludW91cyhyYW5nZSA9IGMoMCwgMTApKSArIFxuICBsYWJzKHggPSBcImNsdXN0ZXJzXCIsIHkgPSBcImluZmVycmVkIGZyb21cIikgKyB0aGVtZV9idygpXG5nZ3NhdmUoXCIuL3Bsb3RzL0FDbW9kZWxfaHVtYW5jb3IucG5nXCIsIHBsb3QgPSBwbG90LCBkZXZpY2UgPSBwbmcsIHdpZHRoID0gNSxoZWlnaHQgPSA0KVxuXG5kczBfZGF0YV9jb25mdXNlX21hdHJpeF90ZXN0XG5kczBfZGF0YV9jb25mdXNlX21hdHJpeF90ZXN0X3Byb3AgICPliIbmnpDlj5HogrLovajov7lcblxuI1JPQ+absue6v1xueGdib29zdF9yb2MgPC0gcFJPQzo6bXVsdGljbGFzcy5yb2MoZHMwX3Rlc3RfZGF0YSRsYWJlbCwgcHJlZGljdF9kczBfdGVzdCkgI+WkmuWIhuexu1JPQ1xuXG4jIOiuoeeul0FSSSBcbmFkanVzdGVkUmFuZEluZGV4KHByZWRpY3RfZHMwX3Rlc3QsIGRzMF90ZXN0X2RhdGEkbGFiZWwpXG5gYGAifQ== -->
```r
x <- c("ds0_0", "ds0_1", "ds0_2", "ds0_3", "ds0_4")
y <- c("AC_0", "AC_1", "AC_2")
prop <- as.numeric(ds0_data_confuse_matrix_test_prop)
data <- expand.grid(x = x, y = y) %>% bind_cols(prop = prop)
plot <- ggplot(data, aes(x = x, y = y, colour = prop, size = prop)) +
geom_point()+
scale_size_continuous(range = c(0, 10)) +
labs(x = "clusters", y = "inferred from") + theme_bw()
ggsave("./plots/ACmodel_humancor.png", plot = plot, device = png, width = 5,height = 4)
ds0_data_confuse_matrix_test
ds0_data_confuse_matrix_test_prop #分析发育轨迹
#ROC曲线
xgboost_roc <- pROC::multiclass.roc(ds0_test_data$label, predict_ds0_test) #多分类ROC
# 计算ARI
adjustedRandIndex(predict_ds0_test, ds0_test_data$label)
ARI = 0.7047121
labels <- lapply(levels(Idents(ds2_AC)), paste0, "_AC") %>% as.character()
labels2 <- lapply(levels(Idents(ds0)), paste0, "_ds0") %>% as.character()
sources <- rep(0:(length(labels)-1), each = length(labels2)) #注意这里的each和times的区别
colors <- rep(colors_list[1:length(labels)], each = length(labels2))
targets <- rep(length(labels)+0:(length(labels2)-1), times = length(labels))
plot_ly(type = "sankey", orientation = "h",
node = list(
label = c(labels,labels2),
color = colors_list, pad = 15, thickness = 30,
line = list(
color = "black",
width = 1)),
link = list(
source = sources,
target = targets,
value = as.numeric(ds0_data_confuse_matrix_test),
color = colors
))
# load("./init.RData")
multi_featureplot(head(importance2,9)$Feature, ds2_AC)
multi_featureplot(head(importance2,9)$Feature, ds0)
multi_featureplot(head(importance2,9)$Feature, ds1)
f("MYH11", ds2_AC)
umapplot(ds0)
# lym_ds2 <- subset(CA_dataset2, idents = c('0','4','9'))
lym_ds2 <- readRDS("lym_ds2.rds")
Idents(lym_ds2) <- lym_ds2$conditions
lym_ds2_AC <- subset(lym_ds2, idents = "AC")
lym_ds2_PA <- subset(lym_ds2, idents = "PA")
lym_ds2_AC <- lym_ds2_AC %>% FindNeighbors(dims = 1:20) %>% FindClusters(resolution = 0.2)
umapplot(lym_ds2_AC)
lym_ds2_PA <- lym_ds2_PA %>% FindNeighbors(dims = 1:20) %>% FindClusters(resolution = 0.2)
umapplot(lym_ds2_PA)
# ggsave("./supp/lym_ds2_PA.svg", plot = umapplot(lym_ds2_PA), device = svg, width = 7, height = 6)
# ggsave("./supp/lym_ds2_AC.svg", plot = umapplot(lym_ds2_AC), device = svg, width = 7, height = 6)
lym_PA_data <- get_data_table(lym_ds2_PA, highvar = F, type = "data")
lym_PA_label <- as.numeric(as.character(Idents(lym_ds2_PA)))
set.seed(7)
index <- c(1:dim(lym_PA_data)[2]) %>% sample(ceiling(0.3*dim(lym_PA_data)[2]), replace = F, prob = NULL)
colnames(lym_PA_data) <- NULL
lym_PA_train_data <- list(data = t(as(lym_PA_data[,-index],"dgCMatrix")), label = lym_PA_label[-index])
lym_PA_test_data <- list(data = t(as(lym_PA_data[,index],"dgCMatrix")), label = lym_PA_label[index])
lym_PA_train <- xgb.DMatrix(data = lym_PA_train_data$data,label = lym_PA_train_data$label)
lym_PA_test <- xgb.DMatrix(data = lym_PA_test_data$data,label = lym_PA_test_data$label)
watchlist <- list(train = lym_PA_train, eval = lym_PA_test)
xgb_param <- list(eta = 0.2, max_depth = 6,
subsample = 0.6, num_class = length(table(Idents(lym_ds2_PA))),
objective = "multi:softmax", eval_metric = 'mlogloss')
bst_model <- xgb.train(xgb_param, lym_PA_train, nrounds = 100, watchlist, verbose = 0)
# 特征提取
importance <- xgb.importance(colnames(lym_PA_train), model = bst_model)
head(importance)
xgb.ggplot.importance(head(importance,20),n_clusters = 1) + theme_bw()+theme(
axis.title.x = element_text(size = 15), axis.text.x = element_text(size = 8, colour = "black"),
axis.title.y = element_text(size = 15), axis.text.y = element_text(size = 12, colour = "black"),
legend.text = element_text(size = 20), legend.title = element_blank(), panel.grid = element_blank())
lym_PA_genes <- head(importance, 500) ##选择top500
multi_featureplot(lym_PA_genes$Feature[1:9],lym_ds2_PA,labels = "")
write.csv(lym_PA_genes,"./datatable/lym_PA_features.csv", row.names = F)
#混淆矩阵
predict_lym_PA_test <- round(predict(bst_model, newdata = lym_PA_test))
lym_PA_confuse_matrix_test <- table(lym_PA_test_data$label, predict_lym_PA_test, dnn=c("true","pre"))
lym_PA_confuse_matrix_test_prop <- prop.table(lym_PA_confuse_matrix_test, 1)
lym_PA_confuse_matrix_test_prop
x <- c("PA_lym_0", "PA_lym_1", "PA_lym_2", "PA_lym_3", "PA_lym_4", "PA_lym_5")
y <- c("PA_lym_0", "PA_lym_1", "PA_lym_2", "PA_lym_3", "PA_lym_4", "PA_lym_5")
prop <- as.numeric(lym_PA_confuse_matrix_test_prop)
data <- expand.grid(x = x, y = y) %>% bind_cols(prop = prop)
plot <- ggplot(data, aes(x = x, y = y, colour = prop, size = prop)) +
geom_point()+
scale_size_continuous(range = c(0, 10)) +
labs(x = "clusters", y = "inferred from") + theme_bw()
ggsave("./plots/PAlymmodel.png", plot = plot, device = png, width = 7,height = 6)
lym_AC_data <- get_data_table(lym_ds2_AC, highvar = F, type = "data")
lym_AC_label <- as.numeric(as.character(Idents(lym_ds2_AC)))
colnames(lym_AC_data) <- NULL
lym_AC_test_data <- list(data = t(as(lym_AC_data,"dgCMatrix")), label = lym_AC_label)
lym_AC_test <- xgb.DMatrix(data = lym_AC_test_data$data,label = lym_AC_test_data$label)
predict_lym_AC_test <- round(predict(bst_model, newdata = lym_AC_test))
lym_AC_confuse_matrix_test <- table(lym_AC_test_data$label, predict_lym_AC_test, dnn=c("true","pre"))
lym_AC_confuse_matrix_test_prop <- prop.table(lym_AC_confuse_matrix_test, 1)
lym_AC_confuse_matrix_test_prop
x <- c("PA_lym_0", "PA_lym_1", "PA_lym_2", "PA_lym_3", "PA_lym_4", "PA_lym_5")
y <- c("PA_lym_0", "PA_lym_1", "PA_lym_2", "PA_lym_3", "PA_lym_4")
prop <- as.numeric(lym_AC_confuse_matrix_test_prop)
data <- expand.grid(x = x, y = y) %>% bind_cols(prop = prop)
plot <- ggplot(data, aes(x = x, y = y, colour = prop, size = prop)) +
geom_point()+
scale_size_continuous(range = c(0, 10)) +
labs(x = "clusters", y = "inferred from") + theme_bw()
ggsave("./plots/PAlymmodel_AC.png", plot = plot, device = png, width = 7,height = 6)
xgboost_roc[["auc"]]
adjustedRandIndex(predict_lym_AC_test, lym_AC_test_data$label)
lym_AC_confuse_matrix_test_prop
sankey_plot(lym_AC_confuse_matrix_test,session = "PAtoAC_lym")
ARI = 0.7213791
labels <- lapply(levels(Idents(lym_ds2_PA)), paste0, "_lymPA") %>% as.character()
labels2 <- lapply(levels(Idents(lym_ds2_AC)), paste0, "_lymAC") %>% as.character()
sources <- rep(0:5, each = 5) #注意这里的each和times的区别
colors <- rep(colors_list[1:6], each = 5)
targets <- rep(6:10, times = 6)
plot_ly(type = "sankey", orientation = "h",
node = list(
label = c(labels,labels2),
color = colors_list, pad = 15, thickness = 30,
line = list(
color = "black",
width = 1)),
link = list(
source = sources,
target = targets,
value = as.numeric(lym_AC_confuse_matrix_test),
color = colors
))
umapplot(lym_ds2_AC)
umapplot(lym_ds2_PA)
sankey_plot <- function(confuse_matrix, label1, label2, session = "session")
{
sources <- rep(0:(length(label1)-1), each = length(label2)) #注意这里的each和times的区别
colors <- rep(aero_colors_list[1:length(label1)], each = length(label2))
targets <- rep(length(label1)+0:(length(label2)-1), times = length(label1))
plot_ly(type = "sankey", orientation = "h",
node = list(
label = c(label1,label2),
color = colors_list, pad = 15, thickness = 30,
line = list(color = "black", width = 1)),
link = list(
source = sources, target = targets,
value = as.numeric(confuse_matrix),
color = colors
)) %>% layout(title=session, font=list(family = "Arial",size = 20, color = 'black'))
}
confuse_bubblemat <- function(confuse_matrix_prop, label1, label2, session = "session")
{
prop <- as.numeric(confuse_matrix_prop)
data <- expand.grid(x = label1, y = label2) %>% bind_cols(prop = prop)
plot <- ggplot(data, aes(x = x, y = y, colour = prop, size = prop)) +
geom_point()+
scale_size_continuous(range = c(0, 10)) +
labs(x = "clusters", y = "inferred from") + theme_bw()
ggsave(paste0(session, ".svg"), plot = plot, device = svg, width = 5,height = 4)
}
## 返回最大的概率对应的index
func <- function(s, ident)
{
if(max(s)>1.2/length(ident))
return(ident[which(s == max(s))])
else
return("unassigned")
}
Add a new chunk by clicking the Insert Chunk button on the toolbar or by pressing Ctrl+Alt+I.
When you save the notebook, an HTML file containing the code and output will be saved alongside it (click the Preview button or press Ctrl+Shift+K to preview the HTML file).
The preview shows you a rendered HTML copy of the contents of the editor. Consequently, unlike Knit, Preview does not run any R code chunks. Instead, the output of the chunk when it was last run in the editor is displayed.